LUCENE-5979: Use the cost API to decide on whether to use random-access to intersect queries and filters.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1629598 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2014-10-06 09:19:59 +00:00
parent 74ca91be6c
commit 1a834a153a
4 changed files with 18 additions and 41 deletions

View File

@ -1250,6 +1250,10 @@ New Features
approximate value of the diameter of the earth at the given latitude.
(Adrien Grand)
* LUCENE-5979: FilteredQuery uses the cost API to decide on whether to use
random-access or leap-frog to intersect the filter with the query.
(Adrien Grand)
Build
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;

View File

@ -254,12 +254,12 @@ public class FilteredQuery extends Query {
* jumping past the target document. When both land on the same document, it's
* collected.
*/
private static class LeapFrogScorer extends Scorer {
private static final class LeapFrogScorer extends Scorer {
private final DocIdSetIterator secondary;
private final DocIdSetIterator primary;
private final Scorer scorer;
protected int primaryDoc = -1;
protected int secondaryDoc = -1;
private int primaryDoc = -1;
private int secondaryDoc = -1;
protected LeapFrogScorer(Weight weight, DocIdSetIterator primary, DocIdSetIterator secondary, Scorer scorer) {
super(weight);
@ -324,26 +324,6 @@ public class FilteredQuery extends Query {
}
}
// TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
private static final class PrimaryAdvancedLeapFrogScorer extends LeapFrogScorer {
private final int firstFilteredDoc;
protected PrimaryAdvancedLeapFrogScorer(Weight weight, int firstFilteredDoc, DocIdSetIterator filterIter, Scorer other) {
super(weight, filterIter, other, other);
this.firstFilteredDoc = firstFilteredDoc;
this.primaryDoc = firstFilteredDoc; // initialize to prevent and advance call to move it further
}
@Override
protected int primaryNext() throws IOException {
if (secondaryDoc != -1) {
return super.primaryNext();
} else {
return firstFilteredDoc;
}
}
}
/** Rewrites the query. If the wrapped is an instance of
* {@link MatchAllDocsQuery} it returns a {@link ConstantScoreQuery}. Otherwise
* it returns a new {@code FilteredQuery} wrapping the rewritten query. */
@ -421,7 +401,7 @@ public class FilteredQuery extends Query {
* A {@link FilterStrategy} that conditionally uses a random access filter if
* the given {@link DocIdSet} supports random access (returns a non-null value
* from {@link DocIdSet#bits()}) and
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, long)} returns
* <code>true</code>. Otherwise this strategy falls back to a "zig-zag join" (
* {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy.
*
@ -515,7 +495,7 @@ public class FilteredQuery extends Query {
* A {@link FilterStrategy} that conditionally uses a random access filter if
* the given {@link DocIdSet} supports random access (returns a non-null value
* from {@link DocIdSet#bits()}) and
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, long)} returns
* <code>true</code>. Otherwise this strategy falls back to a "zig-zag join" (
* {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy .
*/
@ -528,25 +508,18 @@ public class FilteredQuery extends Query {
// this means the filter does not accept any documents.
return null;
}
final int firstFilterDoc = filterIter.nextDoc();
if (firstFilterDoc == DocIdSetIterator.NO_MORE_DOCS) {
return null;
}
final Bits filterAcceptDocs = docIdSet.bits();
// force if RA is requested
final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc);
final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, filterIter.cost());
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
return weight.scorer(context, filterAcceptDocs);
} else {
assert firstFilterDoc > -1;
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
final Scorer scorer = weight.scorer(context, null);
// TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer);
return (scorer == null) ? null : new LeapFrogScorer(weight, filterIter, scorer, scorer);
}
}
@ -557,14 +530,14 @@ public class FilteredQuery extends Query {
* However, when the filter is very sparse, it can be faster to execute the query+filter
* as a conjunction in some cases.
*
* The default implementation returns <code>true</code> if the first document accepted by the
* filter is < 100.
* The default implementation returns <code>true</code> if the filter matches more than 1%
* of documents
*
* @lucene.internal
*/
protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
//TODO once we have a cost API on filters and scorers we should rethink this heuristic
return firstFilterDoc < 100;
protected boolean useRandomAccess(Bits bits, long filterCost) {
// if the filter matches more than 1% of documents, we use random-access
return filterCost * 100 > bits.length();
}
}

View File

@ -387,7 +387,7 @@ public class TestFilteredQuery extends LuceneTestCase {
if (useRandomAccess) {
return new FilteredQuery.RandomAccessFilterStrategy() {
@Override
protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
protected boolean useRandomAccess(Bits bits, long filterCost) {
return true;
}
};

View File

@ -1025,7 +1025,7 @@ public final class TestUtil {
case 4:
return new FilteredQuery.RandomAccessFilterStrategy() {
@Override
protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
protected boolean useRandomAccess(Bits bits, long filterCost) {
return LuceneTestCase.random().nextBoolean();
}
};