LUCENE-7680: Never cache term filters.

This commit is contained in:
Adrien Grand 2017-02-16 09:40:31 +01:00
parent c2f061d7cb
commit 063954ce79
3 changed files with 66 additions and 27 deletions

View File

@ -145,6 +145,10 @@ Improvements
of the less descriptive FileNotFound or NoSuchFileException (Mike Drob via of the less descriptive FileNotFound or NoSuchFileException (Mike Drob via
Mike McCandless, Erick Erickson) Mike McCandless, Erick Erickson)
* LUCENE-7680: UsageTrackingQueryCachingPolicy never caches term filters anymore
since they are plenty fast. This also has the side-effect of leaving more
space in the history for costly filters. (Adrien Grand)
Optimizations Optimizations
* LUCENE-7641: Optimized point range queries to compute documents that do not * LUCENE-7641: Optimized point range queries to compute documents that do not

View File

@ -27,7 +27,7 @@ import org.apache.lucene.util.FrequencyTrackingRingBuffer;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy { public class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy {
// the hash code that we use as a sentinel in the ring buffer. // the hash code that we use as a sentinel in the ring buffer.
private static final int SENTINEL = Integer.MIN_VALUE; private static final int SENTINEL = Integer.MIN_VALUE;
@ -54,16 +54,49 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
isPointQuery(query); isPointQuery(query);
} }
static boolean isCheap(Query query) { private static boolean shouldNeverCache(Query query) {
// same for cheap queries if (query instanceof TermQuery) {
// these queries are so cheap that they usually do not need caching // We do not bother caching term queries since they are already plenty fast.
return query instanceof TermQuery; return true;
}
if (query instanceof MatchAllDocsQuery) {
// MatchAllDocsQuery has an iterator that is faster than what a bit set could do.
return true;
}
// For the below queries, it's cheap to notice they cannot match any docs so
// we do not bother caching them.
if (query instanceof MatchNoDocsQuery) {
return false;
}
if (query instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) query;
if (bq.clauses().isEmpty()) {
return true;
}
}
if (query instanceof DisjunctionMaxQuery) {
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
if (dmq.getDisjuncts().isEmpty()) {
return true;
}
}
return false;
} }
private final FrequencyTrackingRingBuffer recentlyUsedFilters; private final FrequencyTrackingRingBuffer recentlyUsedFilters;
/** /**
* Create a new instance. * Expert: Create a new instance with a configurable history size. Beware of
* passing too large values for the size of the history, either
* {@link #minFrequencyToCache} returns low values and this means some filters
* that are rarely used will be cached, which would hurt performance. Or
* {@link #minFrequencyToCache} returns high values that are function of the
* size of the history but then filters will be slow to make it to the cache.
* *
* @param historySize the number of recently used filters to track * @param historySize the number of recently used filters to track
*/ */
@ -71,20 +104,22 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL); this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
} }
/** Create a new instance with an history size of 256. */ /** Create a new instance with an history size of 256. This should be a good
* default for most cases. */
public UsageTrackingQueryCachingPolicy() { public UsageTrackingQueryCachingPolicy() {
this(256); this(256);
} }
/** /**
* For a given query, return how many times it should appear in the history * For a given filter, return how many times it should appear in the history
* before being cached. * before being cached. The default implementation returns 2 for filters that
* need to evaluate against the entire index to build a {@link DocIdSetIterator},
* like {@link MultiTermQuery}, point-based queries or {@link TermInSetQuery},
* and 5 for other filters.
*/ */
protected int minFrequencyToCache(Query query) { protected int minFrequencyToCache(Query query) {
if (isCostly(query)) { if (isCostly(query)) {
return 2; return 2;
} else if (isCheap(query)) {
return 20;
} else { } else {
// default: cache after the filter has been seen 5 times // default: cache after the filter has been seen 5 times
return 5; return 5;
@ -96,6 +131,10 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
assert query instanceof BoostQuery == false; assert query instanceof BoostQuery == false;
assert query instanceof ConstantScoreQuery == false; assert query instanceof ConstantScoreQuery == false;
if (shouldNeverCache(query)) {
return;
}
// call hashCode outside of sync block // call hashCode outside of sync block
// in case it's somewhat expensive: // in case it's somewhat expensive:
int hashCode = query.hashCode(); int hashCode = query.hashCode();
@ -123,24 +162,9 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
@Override @Override
public boolean shouldCache(Query query) throws IOException { public boolean shouldCache(Query query) throws IOException {
if (query instanceof MatchAllDocsQuery if (shouldNeverCache(query)) {
// MatchNoDocsQuery currently rewrites to a BooleanQuery,
// but who knows, it might get its own Weight one day
|| query instanceof MatchNoDocsQuery) {
return false; return false;
} }
if (query instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) query;
if (bq.clauses().isEmpty()) {
return false;
}
}
if (query instanceof DisjunctionMaxQuery) {
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
if (dmq.getDisjuncts().isEmpty()) {
return false;
}
}
final int frequency = frequency(query); final int frequency = frequency(query);
final int minFrequency = minFrequencyToCache(query); final int minFrequency = minFrequencyToCache(query);
return frequency >= minFrequency; return frequency >= minFrequency;

View File

@ -16,6 +16,8 @@
*/ */
package org.apache.lucene.search; package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -37,4 +39,13 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
assertFalse(policy.shouldCache(q)); assertFalse(policy.shouldCache(q));
} }
public void testNeverCacheTermFilter() throws IOException {
Query q = new TermQuery(new Term("foo", "bar"));
UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
for (int i = 0; i < 1000; ++i) {
policy.onUse(q);
}
assertFalse(policy.shouldCache(q));
}
} }