mirror of https://github.com/apache/lucene.git
LUCENE-6198: add approximation constructor to TwoPhaseIterator
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1669161 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d1c5ff0153
commit
220a0ea642
|
@ -114,7 +114,7 @@ Optimizations
|
||||||
* LUCENE-6198: Added the TwoPhaseIterator API, exposed on scorers which
|
* LUCENE-6198: Added the TwoPhaseIterator API, exposed on scorers which
|
||||||
is for now only used on phrase queries and conjunctions in order to check
|
is for now only used on phrase queries and conjunctions in order to check
|
||||||
positions lazily if the phrase query is in a conjunction with other queries.
|
positions lazily if the phrase query is in a conjunction with other queries.
|
||||||
(Robert Muir, Adrien Grand)
|
(Robert Muir, Adrien Grand, David Smiley)
|
||||||
|
|
||||||
* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a
|
* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a
|
||||||
minShouldMatch > 1 now either propagate or take advantage of the two-phase
|
minShouldMatch > 1 now either propagate or take advantage of the two-phase
|
||||||
|
|
|
@ -151,7 +151,7 @@ public class CachingWrapperQuery extends Query implements Accountable {
|
||||||
twoPhaseView = null;
|
twoPhaseView = null;
|
||||||
disi = approximation;
|
disi = approximation;
|
||||||
} else {
|
} else {
|
||||||
twoPhaseView = new TwoPhaseIterator() {
|
twoPhaseView = new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
|
@ -159,10 +159,6 @@ public class CachingWrapperQuery extends Query implements Accountable {
|
||||||
return acceptDocs.get(doc);
|
return acceptDocs.get(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseView);
|
disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseView);
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,18 +140,12 @@ class ConjunctionDISI extends DocIdSetIterator {
|
||||||
*/
|
*/
|
||||||
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
|
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
|
||||||
|
|
||||||
private final ConjunctionDISI approximation;
|
|
||||||
private final TwoPhaseIterator[] twoPhaseIterators;
|
private final TwoPhaseIterator[] twoPhaseIterators;
|
||||||
|
|
||||||
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
|
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
|
||||||
approximation = new ConjunctionDISI(iterators);
|
super(new ConjunctionDISI(iterators));
|
||||||
assert twoPhaseIterators.size() > 0;
|
assert twoPhaseIterators.size() > 0;
|
||||||
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[0]);
|
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -119,15 +119,10 @@ abstract class DisjunctionScorer extends Scorer {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new TwoPhaseIterator() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
// note it is important to share the same pq as this scorer so that
|
// note it is important to share the same pq as this scorer so that
|
||||||
// rebalancing the pq through the approximation will also rebalance
|
// rebalancing the pq through the approximation will also rebalance
|
||||||
// the pq in this scorer.
|
// the pq in this scorer.
|
||||||
return new DisjunctionDISIApproximation(subScorers);
|
return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
|
|
|
@ -213,24 +213,18 @@ public final class DocValuesRangeQuery extends Query {
|
||||||
|
|
||||||
private static class TwoPhaseNumericRange extends TwoPhaseIterator {
|
private static class TwoPhaseNumericRange extends TwoPhaseIterator {
|
||||||
|
|
||||||
private final DocIdSetIterator approximation;
|
|
||||||
private final SortedNumericDocValues values;
|
private final SortedNumericDocValues values;
|
||||||
private final long min, max;
|
private final long min, max;
|
||||||
private final Bits acceptDocs;
|
private final Bits acceptDocs;
|
||||||
|
|
||||||
TwoPhaseNumericRange(SortedNumericDocValues values, long min, long max, DocIdSetIterator approximation, Bits acceptDocs) {
|
TwoPhaseNumericRange(SortedNumericDocValues values, long min, long max, DocIdSetIterator approximation, Bits acceptDocs) {
|
||||||
|
super(approximation);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.min = min;
|
this.min = min;
|
||||||
this.max = max;
|
this.max = max;
|
||||||
this.approximation = approximation;
|
|
||||||
this.acceptDocs = acceptDocs;
|
this.acceptDocs = acceptDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
final int doc = approximation.docID();
|
final int doc = approximation.docID();
|
||||||
|
@ -251,24 +245,18 @@ public final class DocValuesRangeQuery extends Query {
|
||||||
|
|
||||||
private static class TwoPhaseOrdRange extends TwoPhaseIterator {
|
private static class TwoPhaseOrdRange extends TwoPhaseIterator {
|
||||||
|
|
||||||
private final DocIdSetIterator approximation;
|
|
||||||
private final SortedSetDocValues values;
|
private final SortedSetDocValues values;
|
||||||
private final long minOrd, maxOrd;
|
private final long minOrd, maxOrd;
|
||||||
private final Bits acceptDocs;
|
private final Bits acceptDocs;
|
||||||
|
|
||||||
TwoPhaseOrdRange(SortedSetDocValues values, long minOrd, long maxOrd, DocIdSetIterator approximation, Bits acceptDocs) {
|
TwoPhaseOrdRange(SortedSetDocValues values, long minOrd, long maxOrd, DocIdSetIterator approximation, Bits acceptDocs) {
|
||||||
|
super(approximation);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.minOrd = minOrd;
|
this.minOrd = minOrd;
|
||||||
this.maxOrd = maxOrd;
|
this.maxOrd = maxOrd;
|
||||||
this.approximation = approximation;
|
|
||||||
this.acceptDocs = acceptDocs;
|
this.acceptDocs = acceptDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
final int doc = approximation.docID();
|
final int doc = approximation.docID();
|
||||||
|
|
|
@ -150,11 +150,7 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
||||||
} while (termsEnum.next() != null);
|
} while (termsEnum.next() != null);
|
||||||
|
|
||||||
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
|
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
|
||||||
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator() {
|
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
final int doc = approximation.docID();
|
final int doc = approximation.docID();
|
||||||
|
|
|
@ -160,11 +160,7 @@ public class DocValuesTermsQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
|
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
|
||||||
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator() {
|
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
final int doc = approximation.docID();
|
final int doc = approximation.docID();
|
||||||
|
|
|
@ -63,17 +63,11 @@ final class ExactPhraseScorer extends Scorer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
return new TwoPhaseIterator() {
|
return new TwoPhaseIterator(conjunction) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return phraseFreq() > 0;
|
return phraseFreq() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return conjunction;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -71,7 +71,7 @@ public final class FieldValueQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
|
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
|
||||||
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator() {
|
final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
|
@ -85,10 +85,6 @@ public final class FieldValueQuery extends Query {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
final DocIdSetIterator disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
|
final DocIdSetIterator disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
|
||||||
|
|
||||||
|
|
|
@ -216,11 +216,7 @@ public class FilteredQuery extends Query {
|
||||||
TwoPhaseIterator inner = scorer.asTwoPhaseIterator();
|
TwoPhaseIterator inner = scorer.asTwoPhaseIterator();
|
||||||
if (inner != null) {
|
if (inner != null) {
|
||||||
// we are like a simplified conjunction here, handle the nested case:
|
// we are like a simplified conjunction here, handle the nested case:
|
||||||
return new TwoPhaseIterator() {
|
return new TwoPhaseIterator(inner.approximation()) {
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return inner.approximation();
|
|
||||||
}
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
// check the approximation matches first, then check bits last.
|
// check the approximation matches first, then check bits last.
|
||||||
|
@ -229,12 +225,7 @@ public class FilteredQuery extends Query {
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
// scorer doesnt have an approximation, just use it, to force bits applied last.
|
// scorer doesnt have an approximation, just use it, to force bits applied last.
|
||||||
return new TwoPhaseIterator() {
|
return new TwoPhaseIterator(scorer) {
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return scorer;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
return filterBits.get(scorer.docID());
|
return filterBits.get(scorer.docID());
|
||||||
|
|
|
@ -570,18 +570,12 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
||||||
twoPhaseView = null;
|
twoPhaseView = null;
|
||||||
disi = approximation;
|
disi = approximation;
|
||||||
} else {
|
} else {
|
||||||
twoPhaseView = new TwoPhaseIterator() {
|
twoPhaseView = new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
final int doc = approximation.docID();
|
final int doc = approximation.docID();
|
||||||
return acceptDocs.get(doc);
|
return acceptDocs.get(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseView);
|
disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseView);
|
||||||
}
|
}
|
||||||
|
|
|
@ -136,12 +136,7 @@ class ReqExclScorer extends Scorer {
|
||||||
if (reqTwoPhaseIterator == null) {
|
if (reqTwoPhaseIterator == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return new TwoPhaseIterator() {
|
return new TwoPhaseIterator(reqApproximation) {
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return reqApproximation;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
|
|
|
@ -590,12 +590,7 @@ final class SloppyPhraseScorer extends Scorer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||||
return new TwoPhaseIterator() {
|
return new TwoPhaseIterator(conjunction) {
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return conjunction;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
sloppyFreq = phraseFreq(); // check for phrase
|
sloppyFreq = phraseFreq(); // check for phrase
|
||||||
|
|
|
@ -18,9 +18,11 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An approximation of a {@link DocIdSetIterator}. When the {@link #approximation()}'s
|
* Returned by {@link Scorer#asTwoPhaseIterator()} to expose an approximation of
|
||||||
|
* a {@link DocIdSetIterator}. When the {@link #approximation()}'s
|
||||||
* {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
|
* {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
|
||||||
* return, {@link #matches()} needs to be checked in order to know whether the
|
* return, {@link #matches()} needs to be checked in order to know whether the
|
||||||
* returned doc ID actually matches.
|
* returned doc ID actually matches.
|
||||||
|
@ -28,6 +30,13 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public abstract class TwoPhaseIterator {
|
public abstract class TwoPhaseIterator {
|
||||||
|
|
||||||
|
protected final DocIdSetIterator approximation;
|
||||||
|
|
||||||
|
/** Takes the approximation to be returned by {@link #approximation}. Not null. */
|
||||||
|
protected TwoPhaseIterator(DocIdSetIterator approximation) {
|
||||||
|
this.approximation = Objects.requireNonNull(approximation);
|
||||||
|
}
|
||||||
|
|
||||||
/** Return a {@link DocIdSetIterator} view of the provided
|
/** Return a {@link DocIdSetIterator} view of the provided
|
||||||
* {@link TwoPhaseIterator}. */
|
* {@link TwoPhaseIterator}. */
|
||||||
public static DocIdSetIterator asDocIdSetIterator(TwoPhaseIterator twoPhaseIterator) {
|
public static DocIdSetIterator asDocIdSetIterator(TwoPhaseIterator twoPhaseIterator) {
|
||||||
|
@ -70,7 +79,9 @@ public abstract class TwoPhaseIterator {
|
||||||
/** Return an approximation. The returned {@link DocIdSetIterator} is a
|
/** Return an approximation. The returned {@link DocIdSetIterator} is a
|
||||||
* superset of the matching documents, and each match needs to be confirmed
|
* superset of the matching documents, and each match needs to be confirmed
|
||||||
* with {@link #matches()} in order to know whether it matches or not. */
|
* with {@link #matches()} in order to know whether it matches or not. */
|
||||||
public abstract DocIdSetIterator approximation();
|
public DocIdSetIterator approximation() {
|
||||||
|
return approximation;
|
||||||
|
}
|
||||||
|
|
||||||
/** Return whether the current doc ID that the iterator is on matches. This
|
/** Return whether the current doc ID that the iterator is on matches. This
|
||||||
* method should only be called when the iterator is positionned -- ie. not
|
* method should only be called when the iterator is positionned -- ie. not
|
||||||
|
|
|
@ -28,12 +28,7 @@ import org.apache.lucene.util.TestUtil;
|
||||||
public class TestConjunctionDISI extends LuceneTestCase {
|
public class TestConjunctionDISI extends LuceneTestCase {
|
||||||
|
|
||||||
private static TwoPhaseIterator approximation(final DocIdSetIterator iterator, final FixedBitSet confirmed) {
|
private static TwoPhaseIterator approximation(final DocIdSetIterator iterator, final FixedBitSet confirmed) {
|
||||||
return new TwoPhaseIterator() {
|
return new TwoPhaseIterator(iterator) {
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return iterator;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
|
|
|
@ -166,26 +166,20 @@ public class RandomApproximationQuery extends Query {
|
||||||
private static class RandomTwoPhaseView extends TwoPhaseIterator {
|
private static class RandomTwoPhaseView extends TwoPhaseIterator {
|
||||||
|
|
||||||
private final DocIdSetIterator disi;
|
private final DocIdSetIterator disi;
|
||||||
private final RandomApproximation approximation;
|
|
||||||
private int lastDoc = -1;
|
private int lastDoc = -1;
|
||||||
|
|
||||||
RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
|
RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
|
||||||
|
super(new RandomApproximation(random, disi));
|
||||||
this.disi = disi;
|
this.disi = disi;
|
||||||
this.approximation = new RandomApproximation(random, disi);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator approximation() {
|
|
||||||
return approximation;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean matches() throws IOException {
|
public boolean matches() throws IOException {
|
||||||
if (approximation.docID() == -1 || approximation.docID() == DocIdSetIterator.NO_MORE_DOCS) {
|
if (approximation.docID() == -1 || approximation.docID() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
throw new AssertionError("matches() should not be called on doc ID " + approximation.doc);
|
throw new AssertionError("matches() should not be called on doc ID " + approximation.docID());
|
||||||
}
|
}
|
||||||
if (lastDoc == approximation.docID()) {
|
if (lastDoc == approximation.docID()) {
|
||||||
throw new AssertionError("matches() has been called twice on doc ID " + approximation.doc);
|
throw new AssertionError("matches() has been called twice on doc ID " + approximation.docID());
|
||||||
}
|
}
|
||||||
lastDoc = approximation.docID();
|
lastDoc = approximation.docID();
|
||||||
return approximation.docID() == disi.docID();
|
return approximation.docID() == disi.docID();
|
||||||
|
|
Loading…
Reference in New Issue