mirror of https://github.com/apache/lucene.git
LUCENE-6276: Added TwoPhaseIterator.matchCost().
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1714261 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56b0a46f10
commit
0ed54b3105
|
@ -206,6 +206,9 @@ Optimizations
|
||||||
* LUCENE-6892: various lucene.index initialCapacity tweaks
|
* LUCENE-6892: various lucene.index initialCapacity tweaks
|
||||||
(Christine Poerschke)
|
(Christine Poerschke)
|
||||||
|
|
||||||
|
* LUCENE-6276: Added TwoPhaseIterator.matchCost() which allows to confirm the
|
||||||
|
least costly TwoPhaseIterators first. (Paul Elschot via Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-6817: ComplexPhraseQueryParser.ComplexPhraseQuery does not display
|
* LUCENE-6817: ComplexPhraseQueryParser.ComplexPhraseQuery does not display
|
||||||
|
|
|
@ -155,7 +155,7 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long cost() {
|
public long cost() {
|
||||||
return lead.cost();
|
return lead.cost(); // overestimate
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -164,16 +164,33 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
||||||
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
|
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
|
||||||
|
|
||||||
private final TwoPhaseIterator[] twoPhaseIterators;
|
private final TwoPhaseIterator[] twoPhaseIterators;
|
||||||
|
private final float matchCost;
|
||||||
|
|
||||||
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
|
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
|
||||||
super(new ConjunctionDISI(iterators));
|
super(new ConjunctionDISI(iterators));
|
||||||
assert twoPhaseIterators.size() > 0;
|
assert twoPhaseIterators.size() > 0;
|
||||||
|
|
||||||
|
CollectionUtil.timSort(twoPhaseIterators, new Comparator<TwoPhaseIterator>() {
|
||||||
|
@Override
|
||||||
|
public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) {
|
||||||
|
return Float.compare(o1.matchCost(), o2.matchCost());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
|
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
|
||||||
|
|
||||||
|
// Compute the matchCost as the total matchCost of the sub iterators.
|
||||||
|
// TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight.
|
||||||
|
float totalMatchCost = 0;
|
||||||
|
for (TwoPhaseIterator tpi : twoPhaseIterators) {
|
||||||
|
totalMatchCost += tpi.matchCost();
|
||||||
|
}
|
||||||
|
matchCost = totalMatchCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) {
|
for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first
|
||||||
if (twoPhaseIterator.matches() == false) {
|
if (twoPhaseIterator.matches() == false) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -181,6 +198,11 @@ public class ConjunctionDISI extends DocIdSetIterator {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return matchCost;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -52,19 +52,25 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
boolean hasApproximation = false;
|
float sumMatchCost = 0;
|
||||||
|
long sumApproxCost = 0;
|
||||||
|
|
||||||
|
// Compute matchCost as the avarage over the matchCost of the subScorers.
|
||||||
|
// This is weighted by the cost, which is an expected number of matching documents.
|
||||||
for (DisiWrapper<Scorer> w : subScorers) {
|
for (DisiWrapper<Scorer> w : subScorers) {
|
||||||
if (w.twoPhaseView != null) {
|
if (w.twoPhaseView != null) {
|
||||||
hasApproximation = true;
|
long costWeight = (w.cost <= 1) ? 1 : w.cost;
|
||||||
break;
|
sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
|
||||||
|
sumApproxCost += costWeight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (! hasApproximation) {
|
if (sumApproxCost == 0) { // no sub scorer supports approximations
|
||||||
// none of the sub scorers supports approximations
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final float matchCost = sumMatchCost / sumApproxCost;
|
||||||
|
|
||||||
// note it is important to share the same pq as this scorer so that
|
// note it is important to share the same pq as this scorer so that
|
||||||
// rebalancing the pq through the approximation will also rebalance
|
// rebalancing the pq through the approximation will also rebalance
|
||||||
// the pq in this scorer.
|
// the pq in this scorer.
|
||||||
|
@ -105,6 +111,11 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
DisjunctionScorer.this.topScorers = topScorers;
|
DisjunctionScorer.this.topScorers = topScorers;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return matchCost;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,9 +44,11 @@ final class ExactPhraseScorer extends Scorer {
|
||||||
|
|
||||||
private final Similarity.SimScorer docScorer;
|
private final Similarity.SimScorer docScorer;
|
||||||
private final boolean needsScores;
|
private final boolean needsScores;
|
||||||
|
private float matchCost;
|
||||||
|
|
||||||
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||||
Similarity.SimScorer docScorer, boolean needsScores) throws IOException {
|
Similarity.SimScorer docScorer, boolean needsScores,
|
||||||
|
float matchCost) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
this.needsScores = needsScores;
|
this.needsScores = needsScores;
|
||||||
|
@ -59,6 +61,7 @@ final class ExactPhraseScorer extends Scorer {
|
||||||
}
|
}
|
||||||
conjunction = ConjunctionDISI.intersect(iterators);
|
conjunction = ConjunctionDISI.intersect(iterators);
|
||||||
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
|
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
|
||||||
|
this.matchCost = matchCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -68,6 +71,11 @@ final class ExactPhraseScorer extends Scorer {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return phraseFreq() > 0;
|
return phraseFreq() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return matchCost;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -189,6 +189,7 @@ public class MultiPhraseQuery extends Query {
|
||||||
|
|
||||||
// Reuse single TermsEnum below:
|
// Reuse single TermsEnum below:
|
||||||
final TermsEnum termsEnum = fieldTerms.iterator();
|
final TermsEnum termsEnum = fieldTerms.iterator();
|
||||||
|
float totalMatchCost = 0;
|
||||||
|
|
||||||
for (int pos=0; pos<postingsFreqs.length; pos++) {
|
for (int pos=0; pos<postingsFreqs.length; pos++) {
|
||||||
Term[] terms = termArrays.get(pos);
|
Term[] terms = termArrays.get(pos);
|
||||||
|
@ -199,6 +200,7 @@ public class MultiPhraseQuery extends Query {
|
||||||
if (termState != null) {
|
if (termState != null) {
|
||||||
termsEnum.seekExact(term.bytes(), termState);
|
termsEnum.seekExact(term.bytes(), termState);
|
||||||
postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS));
|
postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS));
|
||||||
|
totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,9 +224,13 @@ public class MultiPhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slop == 0) {
|
if (slop == 0) {
|
||||||
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
|
return new ExactPhraseScorer(this, postingsFreqs,
|
||||||
|
similarity.simScorer(stats, context),
|
||||||
|
needsScores, totalMatchCost);
|
||||||
} else {
|
} else {
|
||||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
|
return new SloppyPhraseScorer(this, postingsFreqs, slop,
|
||||||
|
similarity.simScorer(stats, context),
|
||||||
|
needsScores, totalMatchCost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,8 @@ import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReaderContext;
|
import org.apache.lucene.index.IndexReaderContext;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
@ -405,6 +407,7 @@ public class PhraseQuery extends Query {
|
||||||
|
|
||||||
// Reuse single TermsEnum below:
|
// Reuse single TermsEnum below:
|
||||||
final TermsEnum te = fieldTerms.iterator();
|
final TermsEnum te = fieldTerms.iterator();
|
||||||
|
float totalMatchCost = 0;
|
||||||
|
|
||||||
for (int i = 0; i < terms.length; i++) {
|
for (int i = 0; i < terms.length; i++) {
|
||||||
final Term t = terms[i];
|
final Term t = terms[i];
|
||||||
|
@ -416,6 +419,7 @@ public class PhraseQuery extends Query {
|
||||||
te.seekExact(t.bytes(), state);
|
te.seekExact(t.bytes(), state);
|
||||||
PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS);
|
PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS);
|
||||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
|
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
|
||||||
|
totalMatchCost += termPositionsCost(te);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort by increasing docFreq order
|
// sort by increasing docFreq order
|
||||||
|
@ -424,9 +428,13 @@ public class PhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slop == 0) { // optimize exact case
|
if (slop == 0) { // optimize exact case
|
||||||
return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
|
return new ExactPhraseScorer(this, postingsFreqs,
|
||||||
|
similarity.simScorer(stats, context),
|
||||||
|
needsScores, totalMatchCost);
|
||||||
} else {
|
} else {
|
||||||
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
|
return new SloppyPhraseScorer(this, postingsFreqs, slop,
|
||||||
|
similarity.simScorer(stats, context),
|
||||||
|
needsScores, totalMatchCost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -456,6 +464,42 @@ public class PhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** A guess of
|
||||||
|
* the average number of simple operations for the initial seek and buffer refill
|
||||||
|
* per document for the positions of a term.
|
||||||
|
* See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}.
|
||||||
|
* <p>
|
||||||
|
* Aside: Instead of being constant this could depend among others on
|
||||||
|
* {@link Lucene50PostingsFormat#BLOCK_SIZE},
|
||||||
|
* {@link TermsEnum#docFreq()},
|
||||||
|
* {@link TermsEnum#totalTermFreq()},
|
||||||
|
* {@link DocIdSetIterator#cost()} (expected number of matching docs),
|
||||||
|
* {@link LeafReader#maxDoc()} (total number of docs in the segment),
|
||||||
|
* and the seek time and block size of the device storing the index.
|
||||||
|
*/
|
||||||
|
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
|
||||||
|
|
||||||
|
/** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}
|
||||||
|
* when no seek or buffer refill is done.
|
||||||
|
*/
|
||||||
|
private static final int TERM_OPS_PER_POS = 7;
|
||||||
|
|
||||||
|
/** Returns an expected cost in simple operations
|
||||||
|
* of processing the occurrences of a term
|
||||||
|
* in a document that contains the term.
|
||||||
|
* This is for use by {@link TwoPhaseIterator#matchCost} implementations.
|
||||||
|
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
||||||
|
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||||
|
*/
|
||||||
|
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||||
|
int docFreq = termsEnum.docFreq();
|
||||||
|
assert docFreq > 0;
|
||||||
|
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
||||||
|
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
||||||
|
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
return new PhraseWeight(searcher, needsScores);
|
return new PhraseWeight(searcher, needsScores);
|
||||||
|
|
|
@ -62,6 +62,11 @@ public abstract class RandomAccessWeight extends ConstantScoreWeight {
|
||||||
|
|
||||||
return matchingDocs.get(doc);
|
return matchingDocs.get(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 10; // TODO: use some cost of matchingDocs
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||||
|
|
|
@ -149,6 +149,10 @@ class ReqExclScorer extends Scorer {
|
||||||
return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
|
return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return reqTwoPhaseIterator.matchCost(); // TODO: also use cost of exclApproximation.advance()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,9 +52,11 @@ final class SloppyPhraseScorer extends Scorer {
|
||||||
|
|
||||||
private int numMatches;
|
private int numMatches;
|
||||||
final boolean needsScores;
|
final boolean needsScores;
|
||||||
|
private final float matchCost;
|
||||||
|
|
||||||
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||||
int slop, Similarity.SimScorer docScorer, boolean needsScores) {
|
int slop, Similarity.SimScorer docScorer, boolean needsScores,
|
||||||
|
float matchCost) {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
this.needsScores = needsScores;
|
this.needsScores = needsScores;
|
||||||
|
@ -68,6 +70,7 @@ final class SloppyPhraseScorer extends Scorer {
|
||||||
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||||
}
|
}
|
||||||
conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
|
conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
|
||||||
|
this.matchCost = matchCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -596,6 +599,16 @@ final class SloppyPhraseScorer extends Scorer {
|
||||||
sloppyFreq = phraseFreq(); // check for phrase
|
sloppyFreq = phraseFreq(); // check for phrase
|
||||||
return sloppyFreq != 0F;
|
return sloppyFreq != 0F;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return matchCost;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "SloppyPhraseScorer@asTwoPhaseIterator(" + SloppyPhraseScorer.this + ")";
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,15 +84,23 @@ public abstract class TwoPhaseIterator {
|
||||||
return approximation;
|
return approximation;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return whether the current doc ID that the iterator is on matches. This
|
/** Return whether the current doc ID that {@link #approximation()} is on matches. This
|
||||||
* method should only be called when the iterator is positioned -- ie. not
|
* method should only be called when the iterator is positioned -- ie. not
|
||||||
* when {@link DocIdSetIterator#docID()} is {@code -1} or
|
* when {@link DocIdSetIterator#docID()} is {@code -1} or
|
||||||
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
|
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
|
||||||
public abstract boolean matches() throws IOException;
|
public abstract boolean matches() throws IOException;
|
||||||
|
|
||||||
|
/** An estimate of the expected cost to determine that a single document {@link #matches()}.
|
||||||
|
* This can be called before iterating the documents of {@link #approximation()}.
|
||||||
|
* Returns an expected cost in number of simple operations like addition, multiplication,
|
||||||
|
* comparing two numbers and indexing an array.
|
||||||
|
* The returned value must be positive.
|
||||||
|
*/
|
||||||
|
public abstract float matchCost();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
|
* Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
|
||||||
* when available * otherwise returns null.
|
* when available, otherwise returns null.
|
||||||
*/
|
*/
|
||||||
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
|
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
|
||||||
return (iter instanceof Scorer)
|
return (iter instanceof Scorer)
|
||||||
|
|
|
@ -88,14 +88,34 @@ abstract class ConjunctionSpans extends Spans {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
|
float totalMatchCost = 0;
|
||||||
|
// Compute the matchCost as the total matchCost/positionsCostant of the sub spans.
|
||||||
|
for (Spans spans : subSpans) {
|
||||||
|
TwoPhaseIterator tpi = spans.asTwoPhaseIterator();
|
||||||
|
if (tpi != null) {
|
||||||
|
totalMatchCost += tpi.matchCost();
|
||||||
|
} else {
|
||||||
|
totalMatchCost += spans.positionsCost();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final float matchCost = totalMatchCost;
|
||||||
|
|
||||||
|
return new TwoPhaseIterator(conjunction) {
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return twoPhaseCurrentDocMatches();
|
return twoPhaseCurrentDocMatches();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return matchCost;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
return res;
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null here.
|
||||||
}
|
}
|
||||||
|
|
||||||
public Spans[] getSubSpans() {
|
public Spans[] getSubSpans() {
|
||||||
|
|
|
@ -142,6 +142,16 @@ public abstract class FilterSpans extends Spans {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return inner.matches() && twoPhaseCurrentDocMatches();
|
return inner.matches() && twoPhaseCurrentDocMatches();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return inner.matchCost(); // underestimate
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "FilterSpans@asTwoPhaseIterator(inner=" + inner + ", in=" + in + ")";
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
// wrapped instance has no approximation, but
|
// wrapped instance has no approximation, but
|
||||||
|
@ -151,10 +161,25 @@ public abstract class FilterSpans extends Spans {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return twoPhaseCurrentDocMatches();
|
return twoPhaseCurrentDocMatches();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return in.positionsCost(); // overestimate
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "FilterSpans@asTwoPhaseIterator(in=" + in + ")";
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the current document matches.
|
* Returns true if the current document matches.
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -133,6 +133,11 @@ public class NearSpansUnordered extends ConjunctionSpans {
|
||||||
return in.asTwoPhaseIterator();
|
return in.asTwoPhaseIterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
return in.positionsCost();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() {
|
public int docID() {
|
||||||
return in.docID();
|
return in.docID();
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A Spans that wraps another Spans with a different SimScorer
|
* A Spans that wraps another Spans with a different SimScorer
|
||||||
|
@ -82,4 +83,14 @@ public class ScoringWrapperSpans extends Spans {
|
||||||
public long cost() {
|
public long cost() {
|
||||||
return in.cost();
|
return in.cost();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
|
return in.asTwoPhaseIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
return in.positionsCost();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -384,6 +384,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
||||||
public long cost() {
|
public long cost() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -210,26 +210,58 @@ public final class SpanOrQuery extends SpanQuery {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
boolean hasApproximation = false;
|
float sumMatchCost = 0; // See also DisjunctionScorer.asTwoPhaseIterator()
|
||||||
|
long sumApproxCost = 0;
|
||||||
|
|
||||||
for (DisiWrapper<Spans> w : byDocQueue) {
|
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||||
if (w.twoPhaseView != null) {
|
if (w.twoPhaseView != null) {
|
||||||
hasApproximation = true;
|
long costWeight = (w.cost <= 1) ? 1 : w.cost;
|
||||||
break;
|
sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
|
||||||
|
sumApproxCost += costWeight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!hasApproximation) { // none of the sub spans supports approximations
|
if (sumApproxCost == 0) { // no sub spans supports approximations
|
||||||
|
computePositionsCost();
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final float matchCost = sumMatchCost / sumApproxCost;
|
||||||
|
|
||||||
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
|
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return twoPhaseCurrentDocMatches();
|
return twoPhaseCurrentDocMatches();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return matchCost;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float positionsCost = -1;
|
||||||
|
|
||||||
|
void computePositionsCost() {
|
||||||
|
float sumPositionsCost = 0;
|
||||||
|
long sumCost = 0;
|
||||||
|
for (DisiWrapper<Spans> w : byDocQueue) {
|
||||||
|
long costWeight = (w.cost <= 1) ? 1 : w.cost;
|
||||||
|
sumPositionsCost += w.iterator.positionsCost() * costWeight;
|
||||||
|
sumCost += costWeight;
|
||||||
|
}
|
||||||
|
positionsCost = sumPositionsCost / sumCost;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
// This may be called when asTwoPhaseIterator returned null,
|
||||||
|
// which happens when none of the sub spans supports approximations.
|
||||||
|
assert positionsCost > 0;
|
||||||
|
return positionsCost;
|
||||||
|
}
|
||||||
|
|
||||||
int lastDocTwoPhaseMatched = -1;
|
int lastDocTwoPhaseMatched = -1;
|
||||||
|
|
||||||
boolean twoPhaseCurrentDocMatches() throws IOException {
|
boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||||
|
|
|
@ -117,10 +117,40 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
termsEnum.seekExact(term.bytes(), state);
|
termsEnum.seekExact(term.bytes(), state);
|
||||||
|
|
||||||
final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
|
final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
|
||||||
return new TermSpans(this, getSimScorer(context), postings, term);
|
float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
|
||||||
|
return new TermSpans(this, getSimScorer(context), postings, term, positionsCost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** A guess of
|
||||||
|
* the relative cost of dealing with the term positions
|
||||||
|
* when using a SpanNearQuery instead of a PhraseQuery.
|
||||||
|
*/
|
||||||
|
private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f;
|
||||||
|
|
||||||
|
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
|
||||||
|
|
||||||
|
private static final int TERM_OPS_PER_POS = 7;
|
||||||
|
|
||||||
|
/** Returns an expected cost in simple operations
|
||||||
|
* of processing the occurrences of a term
|
||||||
|
* in a document that contains the term.
|
||||||
|
* <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
|
||||||
|
* @param termsEnum The term is the term at which this TermsEnum is positioned.
|
||||||
|
* <p>
|
||||||
|
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
|
||||||
|
* <br>
|
||||||
|
* TODO: keep only a single copy of this method and the constants used in it
|
||||||
|
* when SpanTermQuery moves to the o.a.l.search package.
|
||||||
|
*/
|
||||||
|
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||||
|
int docFreq = termsEnum.docFreq();
|
||||||
|
assert docFreq > 0;
|
||||||
|
long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
|
||||||
|
float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
|
||||||
|
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString(String field) {
|
public String toString(String field) {
|
||||||
StringBuilder buffer = new StringBuilder();
|
StringBuilder buffer = new StringBuilder();
|
||||||
|
|
|
@ -86,6 +86,17 @@ public abstract class Spans extends Scorer {
|
||||||
*/
|
*/
|
||||||
public abstract void collect(SpanCollector collector) throws IOException;
|
public abstract void collect(SpanCollector collector) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return an estimation of the cost of using the positions of
|
||||||
|
* this {@link Spans} for any single document, but only after
|
||||||
|
* {@link #asTwoPhaseIterator} returned {@code null}.
|
||||||
|
* Otherwise this method should not be called.
|
||||||
|
* The returned value is independent of the current document.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract float positionsCost();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
|
@ -37,13 +37,17 @@ public class TermSpans extends Spans {
|
||||||
protected int count;
|
protected int count;
|
||||||
protected int position;
|
protected int position;
|
||||||
protected boolean readPayload;
|
protected boolean readPayload;
|
||||||
|
private final float positionsCost;
|
||||||
|
|
||||||
public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, PostingsEnum postings, Term term) {
|
public TermSpans(SpanWeight weight, Similarity.SimScorer scorer,
|
||||||
|
PostingsEnum postings, Term term, float positionsCost) {
|
||||||
super(weight, scorer);
|
super(weight, scorer);
|
||||||
this.postings = Objects.requireNonNull(postings);
|
this.postings = Objects.requireNonNull(postings);
|
||||||
this.term = Objects.requireNonNull(term);
|
this.term = Objects.requireNonNull(term);
|
||||||
this.doc = -1;
|
this.doc = -1;
|
||||||
this.position = -1;
|
this.position = -1;
|
||||||
|
assert positionsCost > 0; // otherwise the TermSpans should not be created.
|
||||||
|
this.positionsCost = positionsCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -118,6 +122,11 @@ public class TermSpans extends Spans {
|
||||||
collector.collectLeaf(postings, position, term);
|
collector.collectLeaf(postings, position, term);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
return positionsCost;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "spans(" + term.toString() + ")@" +
|
return "spans(" + term.toString() + ")@" +
|
||||||
|
@ -128,5 +137,4 @@ public class TermSpans extends Spans {
|
||||||
public PostingsEnum getPostings() {
|
public PostingsEnum getPostings() {
|
||||||
return postings;
|
return postings;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,11 @@ public class TestConjunctionDISI extends LuceneTestCase {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return confirmed.get(iterator.docID());
|
return confirmed.get(iterator.docID());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 5; // #operations in FixedBitSet#get()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,11 @@ final class JustCompileSearchSpans {
|
||||||
public long cost() {
|
public long cost() {
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class JustCompileSpanQuery extends SpanQuery {
|
static final class JustCompileSpanQuery extends SpanQuery {
|
||||||
|
|
|
@ -176,6 +176,11 @@ public final class DoubleRange extends Range {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return range.accept(values.doubleVal(approximation.docID()));
|
return range.accept(values.doubleVal(approximation.docID()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of range.accept()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||||
}
|
}
|
||||||
|
|
|
@ -168,6 +168,11 @@ public final class LongRange extends Range {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return range.accept(values.longVal(approximation.docID()));
|
return range.accept(values.longVal(approximation.docID()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of range.accept()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||||
}
|
}
|
||||||
|
|
|
@ -184,6 +184,11 @@ final class GlobalOrdinalsQuery extends Query {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -225,6 +230,11 @@ final class GlobalOrdinalsQuery extends Query {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -211,6 +211,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of values.getOrd() and collector.score()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -253,6 +257,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost.getOrd() of values and collector.score()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,11 @@ public abstract class ValueSourceScorer extends Scorer {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return ValueSourceScorer.this.matches(docID());
|
return ValueSourceScorer.this.matches(docID());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of ValueSourceScorer.this.matches()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
|
this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
|
||||||
}
|
}
|
||||||
|
|
|
@ -274,6 +274,11 @@ public class PayloadScoreQuery extends SpanQuery {
|
||||||
public long cost() {
|
public long cost() {
|
||||||
return in.cost();
|
return in.cost();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
return in.positionsCost();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -108,6 +108,11 @@ public class CompositeVerifyQuery extends Query {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return predFuncValues.boolVal(indexQueryScorer.docID());
|
return predFuncValues.boolVal(indexQueryScorer.docID());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of predFuncValues.boolVal()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
|
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
|
||||||
|
|
|
@ -130,6 +130,11 @@ public class IntersectsRPTVerifyQuery extends Query {
|
||||||
|
|
||||||
return predFuncValues.boolVal(doc);
|
return predFuncValues.boolVal(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 100; // TODO: use cost of exactIterator.advance() and predFuncValues.boolVal()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
|
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
|
||||||
|
|
|
@ -195,6 +195,19 @@ public class AssertingScorer extends Scorer {
|
||||||
}
|
}
|
||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
float matchCost = in.matchCost();
|
||||||
|
assert ! Float.isNaN(matchCost);
|
||||||
|
assert matchCost >= 0;
|
||||||
|
return matchCost;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "AssertingScorer@asTwoPhaseIterator(" + in + ")";
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,5 @@
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -28,6 +17,16 @@ import org.apache.lucene.util.Bits;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link Query} that adds random approximations to its scorers.
|
* A {@link Query} that adds random approximations to its scorers.
|
||||||
*/
|
*/
|
||||||
|
@ -172,10 +171,12 @@ public class RandomApproximationQuery extends Query {
|
||||||
|
|
||||||
private final DocIdSetIterator disi;
|
private final DocIdSetIterator disi;
|
||||||
private int lastDoc = -1;
|
private int lastDoc = -1;
|
||||||
|
private final float randomMatchCost;
|
||||||
|
|
||||||
RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
|
RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
|
||||||
super(new RandomApproximation(random, disi));
|
super(new RandomApproximation(random, disi));
|
||||||
this.disi = disi;
|
this.disi = disi;
|
||||||
|
this.randomMatchCost = random.nextFloat() * 200; // between 0 and 200
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -190,6 +191,10 @@ public class RandomApproximationQuery extends Query {
|
||||||
return approximation.docID() == disi.docID();
|
return approximation.docID() == disi.docID();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return randomMatchCost;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class RandomApproximation extends DocIdSetIterator {
|
private static class RandomApproximation extends DocIdSetIterator {
|
||||||
|
|
|
@ -190,6 +190,14 @@ class AssertingSpans extends Spans {
|
||||||
return in.cost();
|
return in.cost();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float positionsCost() {
|
||||||
|
float cost = in.positionsCost();
|
||||||
|
assert ! Float.isNaN(cost) : "positionsCost() should not be NaN";
|
||||||
|
assert cost > 0 : "positionsCost() must be positive";
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected float scoreCurrentDoc() throws IOException {
|
protected float scoreCurrentDoc() throws IOException {
|
||||||
assert in.docScorer != null : in.getClass() + " has no docScorer!";
|
assert in.docScorer != null : in.getClass() + " has no docScorer!";
|
||||||
|
@ -229,6 +237,18 @@ class AssertingSpans extends Spans {
|
||||||
}
|
}
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
float cost = in.matchCost();
|
||||||
|
if (Float.isNaN(cost)) {
|
||||||
|
throw new AssertionError("matchCost()=" + cost + " should not be NaN on doc ID " + approximation.docID());
|
||||||
|
}
|
||||||
|
if (cost < 0) {
|
||||||
|
throw new AssertionError("matchCost()=" + cost + " should be non negative on doc ID " + approximation.docID());
|
||||||
|
}
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class AssertingDISI extends DocIdSetIterator {
|
class AssertingDISI extends DocIdSetIterator {
|
||||||
|
|
|
@ -129,6 +129,11 @@ public abstract class Filter extends Query {
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return bits.get(approximation.docID());
|
return bits.get(approximation.docID());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 10; // TODO use cost of bits.get()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
return new ConstantScoreScorer(this, 0f, twoPhase);
|
return new ConstantScoreScorer(this, 0f, twoPhase);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue