mirror of https://github.com/apache/lucene.git
LUCENE-6276: Added TwoPhaseIterator.matchCost().
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1714261 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56b0a46f10
commit
0ed54b3105
|
@ -206,6 +206,9 @@ Optimizations
|
|||
* LUCENE-6892: various lucene.index initialCapacity tweaks
|
||||
(Christine Poerschke)
|
||||
|
||||
* LUCENE-6276: Added TwoPhaseIterator.matchCost() which allows to confirm the
|
||||
least costly TwoPhaseIterators first. (Paul Elschot via Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6817: ComplexPhraseQueryParser.ComplexPhraseQuery does not display
|
||||
|
|
|
@ -155,7 +155,7 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
|||
|
||||
@Override
|
||||
public long cost() {
|
||||
return lead.cost();
|
||||
return lead.cost(); // overestimate
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -164,16 +164,33 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
|||
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
|
||||
|
||||
private final TwoPhaseIterator[] twoPhaseIterators;
|
||||
private final float matchCost;
|
||||
|
||||
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
|
||||
super(new ConjunctionDISI(iterators));
|
||||
assert twoPhaseIterators.size() > 0;
|
||||
|
||||
CollectionUtil.timSort(twoPhaseIterators, new Comparator<TwoPhaseIterator>() {
|
||||
@Override
|
||||
public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) {
|
||||
return Float.compare(o1.matchCost(), o2.matchCost());
|
||||
}
|
||||
});
|
||||
|
||||
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
|
||||
|
||||
// Compute the matchCost as the total matchCost of the sub iterators.
|
||||
// TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight.
|
||||
float totalMatchCost = 0;
|
||||
for (TwoPhaseIterator tpi : twoPhaseIterators) {
|
||||
totalMatchCost += tpi.matchCost();
|
||||
}
|
||||
matchCost = totalMatchCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) {
|
||||
for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first
|
||||
if (twoPhaseIterator.matches() == false) {
|
||||
return false;
|
||||
}
|
||||
|
@ -181,6 +198,11 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
|||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return matchCost;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -52,19 +52,25 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
boolean hasApproximation = false;
|
||||
float sumMatchCost = 0;
|
||||
long sumApproxCost = 0;
|
||||
|
||||
// Compute matchCost as the avarage over the matchCost of the subScorers.
|
||||
// This is weighted by the cost, which is an expected number of matching documents.
|
||||
for (DisiWrapper<Scorer> w : subScorers) {
|
||||
if (w.twoPhaseView != null) {
|
||||
hasApproximation = true;
|
||||
break;
|
||||
long costWeight = (w.cost <= 1) ? 1 : w.cost;
|
||||
sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
|
||||
sumApproxCost += costWeight;
|
||||
}
|
||||
}
|
||||
|
||||
if (! hasApproximation) {
|
||||
// none of the sub scorers supports approximations
|
||||
if (sumApproxCost == 0) { // no sub scorer supports approximations
|
||||
return null;
|
||||
}
|
||||
|
||||
final float matchCost = sumMatchCost / sumApproxCost;
|
||||
|
||||
// note it is important to share the same pq as this scorer so that
|
||||
// rebalancing the pq through the approximation will also rebalance
|
||||
// the pq in this scorer.
|
||||
|
@ -105,6 +111,11 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
DisjunctionScorer.this.topScorers = topScorers;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return matchCost;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -44,9 +44,11 @@ final class ExactPhraseScorer extends Scorer {
|
|||
|
||||
private final Similarity.SimScorer docScorer;
|
||||
private final boolean needsScores;
|
||||
private float matchCost;
|
||||
|
||||
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity.SimScorer docScorer, boolean needsScores) throws IOException {
|
||||
Similarity.SimScorer docScorer, boolean needsScores,
|
||||
float matchCost) throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.needsScores = needsScores;
|
||||
|
@ -59,6 +61,7 @@ final class ExactPhraseScorer extends Scorer {
|
|||
}
|
||||
conjunction = ConjunctionDISI.intersect(iterators);
|
||||
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
|
||||
this.matchCost = matchCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -68,6 +71,11 @@ final class ExactPhraseScorer extends Scorer {
|
|||
public boolean matches() throws IOException {
|
||||
return phraseFreq() > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return matchCost;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -189,6 +189,7 @@ public class MultiPhraseQuery extends Query {
|
|||
|
||||
// Reuse single TermsEnum below:
|
||||
final TermsEnum termsEnum = fieldTerms.iterator();
|
||||
float totalMatchCost = 0;
|
||||
|
||||
for (int pos=0; pos<postingsFreqs.length; pos++) {
|
||||
Term[] terms = termArrays.get(pos);
|
||||
|
@ -199,6 +200,7 @@ public class MultiPhraseQuery extends Query {
|
|||
if (termState != null) {
|
||||
termsEnum.seekExact(term.bytes(), termState);
|
||||
postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS));
|
||||
totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -222,9 +224,13 @@ public class MultiPhraseQuery extends Query {
|
|||
}
|
||||
|
||||
if (slop == 0) {
|
||||
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
|
||||
return new ExactPhraseScorer(this, postingsFreqs,
|
||||
similarity.simScorer(stats, context),
|
||||
needsScores, totalMatchCost);
|
||||
} else {
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop,
|
||||
similarity.simScorer(stats, context),
|
||||
needsScores, totalMatchCost);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,8 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -405,6 +407,7 @@ public class PhraseQuery extends Query {
|
|||
|
||||
// Reuse single TermsEnum below:
|
||||
final TermsEnum te = fieldTerms.iterator();
|
||||
float totalMatchCost = 0;
|
||||
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
final Term t = terms[i];
|
||||
|
@ -416,6 +419,7 @@ public class PhraseQuery extends Query {
|
|||
te.seekExact(t.bytes(), state);
|
||||
PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS);
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
|
||||
totalMatchCost += termPositionsCost(te);
|
||||
}
|
||||
|
||||
// sort by increasing docFreq order
|
||||
|
@ -424,9 +428,13 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
|
||||
if (slop == 0) { // optimize exact case
|
||||
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
|
||||
return new ExactPhraseScorer(this, postingsFreqs,
|
||||
similarity.simScorer(stats, context),
|
||||
needsScores, totalMatchCost);
|
||||
} else {
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
|
||||
return new SloppyPhraseScorer(this, postingsFreqs, slop,
|
||||
similarity.simScorer(stats, context),
|
||||
needsScores, totalMatchCost);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -456,6 +464,42 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
/** A guess of
|
||||
* the average number of simple operations for the initial seek and buffer refill
|
||||
* per document for the positions of a term.
|
||||
* See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}.
|
||||
* <p>
|
||||
* Aside: Instead of being constant this could depend among others on
|
||||
* {@link Lucene50PostingsFormat#BLOCK_SIZE},
|
||||
* {@link TermsEnum#docFreq()},
|
||||
* {@link TermsEnum#totalTermFreq()},
|
||||
* {@link DocIdSetIterator#cost()} (expected number of matching docs),
|
||||
* {@link LeafReader#maxDoc()} (total number of docs in the segment),
|
||||
* and the seek time and block size of the device storing the index.
|
||||
*/
|
||||
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
|
||||
|
||||
/** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}
|
||||
* when no seek or buffer refill is done.
|
||||
*/
|
||||
private static final int TERM_OPS_PER_POS = 7;
|
||||
|
||||
/** Returns an expected cost in simple operations
|
||||
* of processing the occurrences of a term
|
||||
* in a document that contains the term.
|
||||
* This is for use by {@link TwoPhaseIterator#matchCost} implementations.
|
||||
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
||||
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||
*/
|
||||
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||
int docFreq = termsEnum.docFreq();
|
||||
assert docFreq > 0;
|
||||
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
||||
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
||||
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new PhraseWeight(searcher, needsScores);
|
||||
|
|
|
@ -62,6 +62,11 @@ public abstract class RandomAccessWeight extends ConstantScoreWeight {
|
|||
|
||||
return matchingDocs.get(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 10; // TODO: use some cost of matchingDocs
|
||||
}
|
||||
};
|
||||
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
|
|
|
@ -149,6 +149,10 @@ class ReqExclScorer extends Scorer {
|
|||
return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return reqTwoPhaseIterator.matchCost(); // TODO: also use cost of exclApproximation.advance()
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,9 +52,11 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
|
||||
private int numMatches;
|
||||
final boolean needsScores;
|
||||
private final float matchCost;
|
||||
|
||||
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
int slop, Similarity.SimScorer docScorer, boolean needsScores) {
|
||||
int slop, Similarity.SimScorer docScorer, boolean needsScores,
|
||||
float matchCost) {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.needsScores = needsScores;
|
||||
|
@ -68,6 +70,7 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||
}
|
||||
conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
|
||||
this.matchCost = matchCost;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -596,6 +599,16 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
sloppyFreq = phraseFreq(); // check for phrase
|
||||
return sloppyFreq != 0F;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return matchCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SloppyPhraseScorer@asTwoPhaseIterator(" + SloppyPhraseScorer.this + ")";
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,15 +84,23 @@ public abstract class TwoPhaseIterator {
|
|||
return approximation;
|
||||
}
|
||||
|
||||
/** Return whether the current doc ID that the iterator is on matches. This
|
||||
/** Return whether the current doc ID that {@link #approximation()} is on matches. This
|
||||
* method should only be called when the iterator is positioned -- ie. not
|
||||
* when {@link DocIdSetIterator#docID()} is {@code -1} or
|
||||
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
|
||||
public abstract boolean matches() throws IOException;
|
||||
|
||||
/** An estimate of the expected cost to determine that a single document {@link #matches()}.
|
||||
* This can be called before iterating the documents of {@link #approximation()}.
|
||||
* Returns an expected cost in number of simple operations like addition, multiplication,
|
||||
* comparing two numbers and indexing an array.
|
||||
* The returned value must be positive.
|
||||
*/
|
||||
public abstract float matchCost();
|
||||
|
||||
/**
|
||||
* Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
|
||||
* when available * otherwise returns null.
|
||||
* when available, otherwise returns null.
|
||||
*/
|
||||
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
|
||||
return (iter instanceof Scorer)
|
||||
|
|
|
@ -88,14 +88,34 @@ abstract class ConjunctionSpans extends Spans {
|
|||
*/
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
|
||||
float totalMatchCost = 0;
|
||||
// Compute the matchCost as the total matchCost/positionsCostant of the sub spans.
|
||||
for (Spans spans : subSpans) {
|
||||
TwoPhaseIterator tpi = spans.asTwoPhaseIterator();
|
||||
if (tpi != null) {
|
||||
totalMatchCost += tpi.matchCost();
|
||||
} else {
|
||||
totalMatchCost += spans.positionsCost();
|
||||
}
|
||||
}
|
||||
final float matchCost = totalMatchCost;
|
||||
|
||||
return new TwoPhaseIterator(conjunction) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return matchCost;
|
||||
}
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null here.
|
||||
}
|
||||
|
||||
public Spans[] getSubSpans() {
|
||||
|
|
|
@ -142,6 +142,16 @@ public abstract class FilterSpans extends Spans {
|
|||
public boolean matches() throws IOException {
|
||||
return inner.matches() && twoPhaseCurrentDocMatches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return inner.matchCost(); // underestimate
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FilterSpans@asTwoPhaseIterator(inner=" + inner + ", in=" + in + ")";
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// wrapped instance has no approximation, but
|
||||
|
@ -151,10 +161,25 @@ public abstract class FilterSpans extends Spans {
|
|||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return in.positionsCost(); // overestimate
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FilterSpans@asTwoPhaseIterator(in=" + in + ")";
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the current document matches.
|
||||
* <p>
|
||||
|
|
|
@ -133,6 +133,11 @@ public class NearSpansUnordered extends ConjunctionSpans {
|
|||
return in.asTwoPhaseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
return in.positionsCost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return in.docID();
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
/**
|
||||
* A Spans that wraps another Spans with a different SimScorer
|
||||
|
@ -82,4 +83,14 @@ public class ScoringWrapperSpans extends Spans {
|
|||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
return in.asTwoPhaseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
return in.positionsCost();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -384,6 +384,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
public long cost() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -210,26 +210,58 @@ public final class SpanOrQuery extends SpanQuery {
|
|||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
boolean hasApproximation = false;
|
||||
float sumMatchCost = 0; // See also DisjunctionScorer.asTwoPhaseIterator()
|
||||
long sumApproxCost = 0;
|
||||
|
||||
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||
if (w.twoPhaseView != null) {
|
||||
hasApproximation = true;
|
||||
break;
|
||||
long costWeight = (w.cost <= 1) ? 1 : w.cost;
|
||||
sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
|
||||
sumApproxCost += costWeight;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasApproximation) { // none of the sub spans supports approximations
|
||||
if (sumApproxCost == 0) { // no sub spans supports approximations
|
||||
computePositionsCost();
|
||||
return null;
|
||||
}
|
||||
|
||||
final float matchCost = sumMatchCost / sumApproxCost;
|
||||
|
||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return matchCost;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
float positionsCost = -1;
|
||||
|
||||
void computePositionsCost() {
|
||||
float sumPositionsCost = 0;
|
||||
long sumCost = 0;
|
||||
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||
long costWeight = (w.cost <= 1) ? 1 : w.cost;
|
||||
sumPositionsCost += w.iterator.positionsCost() * costWeight;
|
||||
sumCost += costWeight;
|
||||
}
|
||||
positionsCost = sumPositionsCost / sumCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
// This may be called when asTwoPhaseIterator returned null,
|
||||
// which happens when none of the sub spans supports approximations.
|
||||
assert positionsCost > 0;
|
||||
return positionsCost;
|
||||
}
|
||||
|
||||
int lastDocTwoPhaseMatched = -1;
|
||||
|
||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
|
|
|
@ -117,10 +117,40 @@ public class SpanTermQuery extends SpanQuery {
|
|||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
|
||||
return new TermSpans(this, getSimScorer(context), postings, term);
|
||||
float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
|
||||
return new TermSpans(this, getSimScorer(context), postings, term, positionsCost);
|
||||
}
|
||||
}
|
||||
|
||||
/** A guess of
|
||||
* the relative cost of dealing with the term positions
|
||||
* when using a SpanNearQuery instead of a PhraseQuery.
|
||||
*/
|
||||
private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f;
|
||||
|
||||
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
|
||||
|
||||
private static final int TERM_OPS_PER_POS = 7;
|
||||
|
||||
/** Returns an expected cost in simple operations
|
||||
* of processing the occurrences of a term
|
||||
* in a document that contains the term.
|
||||
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
||||
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||
* <p>
|
||||
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
|
||||
* <br>
|
||||
* TODO: keep only a single copy of this method and the constants used in it
|
||||
* when SpanTermQuery moves to the o.a.l.search package.
|
||||
*/
|
||||
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||
int docFreq = termsEnum.docFreq();
|
||||
assert docFreq > 0;
|
||||
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
||||
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
||||
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
|
|
@ -86,6 +86,17 @@ public abstract class Spans extends Scorer {
|
|||
*/
|
||||
public abstract void collect(SpanCollector collector) throws IOException;
|
||||
|
||||
/**
|
||||
* Return an estimation of the cost of using the positions of
|
||||
* this {@link Spans} for any single document, but only after
|
||||
* {@link #asTwoPhaseIterator} returned {@code null}.
|
||||
* Otherwise this method should not be called.
|
||||
* The returned value is independent of the current document.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract float positionsCost();
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
|
|
@ -37,13 +37,17 @@ public class TermSpans extends Spans {
|
|||
protected int count;
|
||||
protected int position;
|
||||
protected boolean readPayload;
|
||||
private final float positionsCost;
|
||||
|
||||
public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, PostingsEnum postings, Term term) {
|
||||
public TermSpans(SpanWeight weight, Similarity.SimScorer scorer,
|
||||
PostingsEnum postings, Term term, float positionsCost) {
|
||||
super(weight, scorer);
|
||||
this.postings = Objects.requireNonNull(postings);
|
||||
this.term = Objects.requireNonNull(term);
|
||||
this.doc = -1;
|
||||
this.position = -1;
|
||||
assert positionsCost > 0; // otherwise the TermSpans should not be created.
|
||||
this.positionsCost = positionsCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -118,6 +122,11 @@ public class TermSpans extends Spans {
|
|||
collector.collectLeaf(postings, position, term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
return positionsCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "spans(" + term.toString() + ")@" +
|
||||
|
@ -128,5 +137,4 @@ public class TermSpans extends Spans {
|
|||
public PostingsEnum getPostings() {
|
||||
return postings;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -37,6 +37,11 @@ public class TestConjunctionDISI extends LuceneTestCase {
|
|||
public boolean matches() throws IOException {
|
||||
return confirmed.get(iterator.docID());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 5; // #operations in FixedBitSet#get()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -82,6 +82,11 @@ final class JustCompileSearchSpans {
|
|||
public long cost() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
static final class JustCompileSpanQuery extends SpanQuery {
|
||||
|
|
|
@ -176,6 +176,11 @@ public final class DoubleRange extends Range {
|
|||
public boolean matches() throws IOException {
|
||||
return range.accept(values.doubleVal(approximation.docID()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of range.accept()
|
||||
}
|
||||
};
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
}
|
||||
|
|
|
@ -168,6 +168,11 @@ public final class LongRange extends Range {
|
|||
public boolean matches() throws IOException {
|
||||
return range.accept(values.longVal(approximation.docID()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of range.accept()
|
||||
}
|
||||
};
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
}
|
||||
|
|
|
@ -184,6 +184,11 @@ final class GlobalOrdinalsQuery extends Query {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -225,6 +230,11 @@ final class GlobalOrdinalsQuery extends Query {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -211,6 +211,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of values.getOrd() and collector.score()
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -253,6 +257,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost.getOrd() of values and collector.score()
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,6 +53,11 @@ public abstract class ValueSourceScorer extends Scorer {
|
|||
public boolean matches() throws IOException {
|
||||
return ValueSourceScorer.this.matches(docID());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of ValueSourceScorer.this.matches()
|
||||
}
|
||||
};
|
||||
this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
|
||||
}
|
||||
|
|
|
@ -274,6 +274,11 @@ public class PayloadScoreQuery extends SpanQuery {
|
|||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
return in.positionsCost();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -108,6 +108,11 @@ public class CompositeVerifyQuery extends Query {
|
|||
public boolean matches() throws IOException {
|
||||
return predFuncValues.boolVal(indexQueryScorer.docID());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of predFuncValues.boolVal()
|
||||
}
|
||||
};
|
||||
|
||||
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
|
||||
|
|
|
@ -130,6 +130,11 @@ public class IntersectsRPTVerifyQuery extends Query {
|
|||
|
||||
return predFuncValues.boolVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 100; // TODO: use cost of exactIterator.advance() and predFuncValues.boolVal()
|
||||
}
|
||||
};
|
||||
|
||||
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
|
||||
|
|
|
@ -195,6 +195,19 @@ public class AssertingScorer extends Scorer {
|
|||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
float matchCost = in.matchCost();
|
||||
assert ! Float.isNaN(matchCost);
|
||||
assert matchCost >= 0;
|
||||
return matchCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "AssertingScorer@asTwoPhaseIterator(" + in + ")";
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,16 +1,5 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -28,6 +17,16 @@ import org.apache.lucene.util.Bits;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
/**
|
||||
* A {@link Query} that adds random approximations to its scorers.
|
||||
*/
|
||||
|
@ -172,10 +171,12 @@ public class RandomApproximationQuery extends Query {
|
|||
|
||||
private final DocIdSetIterator disi;
|
||||
private int lastDoc = -1;
|
||||
private final float randomMatchCost;
|
||||
|
||||
RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
|
||||
super(new RandomApproximation(random, disi));
|
||||
this.disi = disi;
|
||||
this.randomMatchCost = random.nextFloat() * 200; // between 0 and 200
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -190,6 +191,10 @@ public class RandomApproximationQuery extends Query {
|
|||
return approximation.docID() == disi.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return randomMatchCost;
|
||||
}
|
||||
}
|
||||
|
||||
private static class RandomApproximation extends DocIdSetIterator {
|
||||
|
|
|
@ -190,6 +190,14 @@ class AssertingSpans extends Spans {
|
|||
return in.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float positionsCost() {
|
||||
float cost = in.positionsCost();
|
||||
assert ! Float.isNaN(cost) : "positionsCost() should not be NaN";
|
||||
assert cost > 0 : "positionsCost() must be positive";
|
||||
return cost;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected float scoreCurrentDoc() throws IOException {
|
||||
assert in.docScorer != null : in.getClass() + " has no docScorer!";
|
||||
|
@ -229,6 +237,18 @@ class AssertingSpans extends Spans {
|
|||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
float cost = in.matchCost();
|
||||
if (Float.isNaN(cost)) {
|
||||
throw new AssertionError("matchCost()=" + cost + " should not be NaN on doc ID " + approximation.docID());
|
||||
}
|
||||
if (cost < 0) {
|
||||
throw new AssertionError("matchCost()=" + cost + " should be non negative on doc ID " + approximation.docID());
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
class AssertingDISI extends DocIdSetIterator {
|
||||
|
|
|
@ -129,6 +129,11 @@ public abstract class Filter extends Query {
|
|||
public boolean matches() throws IOException {
|
||||
return bits.get(approximation.docID());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 10; // TODO use cost of bits.get()
|
||||
}
|
||||
};
|
||||
return new ConstantScoreScorer(this, 0f, twoPhase);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue