mirror of https://github.com/apache/lucene.git
LUCENE-7680: Never cache term filters.
This commit is contained in:
parent
c2f061d7cb
commit
063954ce79
|
@ -145,6 +145,10 @@ Improvements
|
||||||
of the less descriptive FileNotFound or NoSuchFileException (Mike Drob via
|
of the less descriptive FileNotFound or NoSuchFileException (Mike Drob via
|
||||||
Mike McCandless, Erick Erickson)
|
Mike McCandless, Erick Erickson)
|
||||||
|
|
||||||
|
* LUCENE-7680: UsageTrackingQueryCachingPolicy never caches term filters anymore
|
||||||
|
since they are plenty fast. This also has the side-effect of leaving more
|
||||||
|
space in the history for costly filters. (Adrien Grand)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-7641: Optimized point range queries to compute documents that do not
|
* LUCENE-7641: Optimized point range queries to compute documents that do not
|
||||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.util.FrequencyTrackingRingBuffer;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy {
|
public class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy {
|
||||||
|
|
||||||
// the hash code that we use as a sentinel in the ring buffer.
|
// the hash code that we use as a sentinel in the ring buffer.
|
||||||
private static final int SENTINEL = Integer.MIN_VALUE;
|
private static final int SENTINEL = Integer.MIN_VALUE;
|
||||||
|
@ -54,16 +54,49 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
|
||||||
isPointQuery(query);
|
isPointQuery(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean isCheap(Query query) {
|
private static boolean shouldNeverCache(Query query) {
|
||||||
// same for cheap queries
|
if (query instanceof TermQuery) {
|
||||||
// these queries are so cheap that they usually do not need caching
|
// We do not bother caching term queries since they are already plenty fast.
|
||||||
return query instanceof TermQuery;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query instanceof MatchAllDocsQuery) {
|
||||||
|
// MatchAllDocsQuery has an iterator that is faster than what a bit set could do.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the below queries, it's cheap to notice they cannot match any docs so
|
||||||
|
// we do not bother caching them.
|
||||||
|
if (query instanceof MatchNoDocsQuery) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query instanceof BooleanQuery) {
|
||||||
|
BooleanQuery bq = (BooleanQuery) query;
|
||||||
|
if (bq.clauses().isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query instanceof DisjunctionMaxQuery) {
|
||||||
|
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
|
||||||
|
if (dmq.getDisjuncts().isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final FrequencyTrackingRingBuffer recentlyUsedFilters;
|
private final FrequencyTrackingRingBuffer recentlyUsedFilters;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new instance.
|
* Expert: Create a new instance with a configurable history size. Beware of
|
||||||
|
* passing too large values for the size of the history, either
|
||||||
|
* {@link #minFrequencyToCache} returns low values and this means some filters
|
||||||
|
* that are rarely used will be cached, which would hurt performance. Or
|
||||||
|
* {@link #minFrequencyToCache} returns high values that are function of the
|
||||||
|
* size of the history but then filters will be slow to make it to the cache.
|
||||||
*
|
*
|
||||||
* @param historySize the number of recently used filters to track
|
* @param historySize the number of recently used filters to track
|
||||||
*/
|
*/
|
||||||
|
@ -71,20 +104,22 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
|
||||||
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
|
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create a new instance with an history size of 256. */
|
/** Create a new instance with an history size of 256. This should be a good
|
||||||
|
* default for most cases. */
|
||||||
public UsageTrackingQueryCachingPolicy() {
|
public UsageTrackingQueryCachingPolicy() {
|
||||||
this(256);
|
this(256);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For a given query, return how many times it should appear in the history
|
* For a given filter, return how many times it should appear in the history
|
||||||
* before being cached.
|
* before being cached. The default implementation returns 2 for filters that
|
||||||
|
* need to evaluate against the entire index to build a {@link DocIdSetIterator},
|
||||||
|
* like {@link MultiTermQuery}, point-based queries or {@link TermInSetQuery},
|
||||||
|
* and 5 for other filters.
|
||||||
*/
|
*/
|
||||||
protected int minFrequencyToCache(Query query) {
|
protected int minFrequencyToCache(Query query) {
|
||||||
if (isCostly(query)) {
|
if (isCostly(query)) {
|
||||||
return 2;
|
return 2;
|
||||||
} else if (isCheap(query)) {
|
|
||||||
return 20;
|
|
||||||
} else {
|
} else {
|
||||||
// default: cache after the filter has been seen 5 times
|
// default: cache after the filter has been seen 5 times
|
||||||
return 5;
|
return 5;
|
||||||
|
@ -96,6 +131,10 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
|
||||||
assert query instanceof BoostQuery == false;
|
assert query instanceof BoostQuery == false;
|
||||||
assert query instanceof ConstantScoreQuery == false;
|
assert query instanceof ConstantScoreQuery == false;
|
||||||
|
|
||||||
|
if (shouldNeverCache(query)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// call hashCode outside of sync block
|
// call hashCode outside of sync block
|
||||||
// in case it's somewhat expensive:
|
// in case it's somewhat expensive:
|
||||||
int hashCode = query.hashCode();
|
int hashCode = query.hashCode();
|
||||||
|
@ -123,24 +162,9 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean shouldCache(Query query) throws IOException {
|
public boolean shouldCache(Query query) throws IOException {
|
||||||
if (query instanceof MatchAllDocsQuery
|
if (shouldNeverCache(query)) {
|
||||||
// MatchNoDocsQuery currently rewrites to a BooleanQuery,
|
|
||||||
// but who knows, it might get its own Weight one day
|
|
||||||
|| query instanceof MatchNoDocsQuery) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (query instanceof BooleanQuery) {
|
|
||||||
BooleanQuery bq = (BooleanQuery) query;
|
|
||||||
if (bq.clauses().isEmpty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (query instanceof DisjunctionMaxQuery) {
|
|
||||||
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
|
|
||||||
if (dmq.getDisjuncts().isEmpty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
final int frequency = frequency(query);
|
final int frequency = frequency(query);
|
||||||
final int minFrequency = minFrequencyToCache(query);
|
final int minFrequency = minFrequencyToCache(query);
|
||||||
return frequency >= minFrequency;
|
return frequency >= minFrequency;
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -37,4 +39,13 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
|
||||||
assertFalse(policy.shouldCache(q));
|
assertFalse(policy.shouldCache(q));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNeverCacheTermFilter() throws IOException {
|
||||||
|
Query q = new TermQuery(new Term("foo", "bar"));
|
||||||
|
UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
|
||||||
|
for (int i = 0; i < 1000; ++i) {
|
||||||
|
policy.onUse(q);
|
||||||
|
}
|
||||||
|
assertFalse(policy.shouldCache(q));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue