LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for caching as they could break memory accounting of the query cache.

This commit is contained in:
Adrien Grand 2017-11-29 09:29:52 +01:00
parent 70767b109f
commit 6d34f23263
6 changed files with 28 additions and 32 deletions

View File

@ -123,6 +123,10 @@ Optimizations
(Dawid Weiss, Robert Muir, Mike McCandless)
* LUCENE-8062: GlobalOrdinalsQuery is no longer eligible for caching. (Jim Ferenczi)
* LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for
caching as they could break memory accounting of the query cache.
(Adrien Grand)
Tests

View File

@ -301,6 +301,12 @@ final class BooleanWeight extends Weight {
@Override
public boolean isCacheable(LeafReaderContext ctx) {
if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
// Disallow caching large boolean queries to not encourage users
// to build large boolean queries as a workaround to the fact that
// we disallow caching large TermInSetQueries.
return false;
}
for (Weight w : weights) {
if (w.isCacheable(ctx) == false)
return false;

View File

@ -139,6 +139,12 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
@Override
public boolean isCacheable(LeafReaderContext ctx) {
if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
// Disallow caching large dismax queries to not encourage users
// to build large dismax queries as a workaround to the fact that
// we disallow caching large TermInSetQueries.
return false;
}
for (Weight w : weights) {
if (w.isCacheable(ctx) == false)
return false;

View File

@ -87,8 +87,9 @@ import org.apache.lucene.util.RoaringDocIdSet;
*/
public class LRUQueryCache implements QueryCache, Accountable {
// memory usage of a simple term query
static final long QUERY_DEFAULT_RAM_BYTES_USED = 192;
// approximate memory usage that we assign to all queries
// this maps roughly to a BooleanQuery with a couple term clauses
static final long QUERY_DEFAULT_RAM_BYTES_USED = 1024;
static final long HASHTABLE_RAM_BYTES_PER_ENTRY =
2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value
@ -298,7 +299,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
try {
Query singleton = uniqueQueries.putIfAbsent(query, query);
if (singleton == null) {
onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query));
onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
} else {
query = singleton;
}
@ -381,7 +382,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
private void onEviction(Query singleton) {
assert lock.isHeldByCurrentThread();
onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(singleton));
onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
for (LeafCache leafCache : cache.values()) {
leafCache.remove(singleton);
}
@ -421,9 +422,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
long recomputedRamBytesUsed =
HASHTABLE_RAM_BYTES_PER_ENTRY * cache.size()
+ LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY * uniqueQueries.size();
for (Query query : mostRecentlyUsedQueries) {
recomputedRamBytesUsed += ramBytesUsed(query);
}
recomputedRamBytesUsed += mostRecentlyUsedQueries.size() * QUERY_DEFAULT_RAM_BYTES_USED;
for (LeafCache leafCache : cache.values()) {
recomputedRamBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY * leafCache.cache.size();
for (DocIdSet set : leafCache.cache.values()) {
@ -481,18 +480,6 @@ public class LRUQueryCache implements QueryCache, Accountable {
}
}
/**
* Return the number of bytes used by the given query. The default
* implementation returns {@link Accountable#ramBytesUsed()} if the query
* implements {@link Accountable} and <code>1024</code> otherwise.
*/
protected long ramBytesUsed(Query query) {
if (query instanceof Accountable) {
return ((Accountable) query).ramBytesUsed();
}
return QUERY_DEFAULT_RAM_BYTES_USED;
}
/**
* Default cache implementation: uses {@link RoaringDocIdSet} for sets that
* have a density &lt; 1% and a {@link BitDocIdSet} over a {@link FixedBitSet}

View File

@ -318,7 +318,9 @@ public class TermInSetQuery extends Query implements Accountable {
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
// Only cache instances that have a reasonable size. Otherwise it might cause memory issues
// with the query cache if most memory ends up being spent on queries rather than doc id sets.
return ramBytesUsed() <= LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
}
};

View File

@ -77,15 +77,6 @@ public class TestLRUQueryCache extends LuceneTestCase {
};
public void testFilterRamBytesUsed() {
final Query simpleQuery = new TermQuery(new Term("some_field", "some_term"));
final long actualRamBytesUsed = RamUsageTester.sizeOf(simpleQuery);
final long ramBytesUsed = LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
// we cannot assert exactly that the constant is correct since actual
// memory usage depends on JVM implementations and settings (eg. UseCompressedOops)
assertEquals(actualRamBytesUsed, ramBytesUsed, actualRamBytesUsed / 2);
}
public void testConcurrency() throws Throwable {
final LRUQueryCache queryCache = new LRUQueryCache(1 + random().nextInt(20), 1 + random().nextInt(10000), context -> random().nextBoolean());
Directory dir = newDirectory();
@ -282,7 +273,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
return ((DocIdSet) o).ramBytesUsed();
}
if (o instanceof Query) {
return queryCache.ramBytesUsed((Query) o);
return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
}
if (o instanceof IndexReader || o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
// do not take readers or core cache keys into account
@ -403,7 +394,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
return ((DocIdSet) o).ramBytesUsed();
}
if (o instanceof Query) {
return queryCache.ramBytesUsed((Query) o);
return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
}
if (o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
// do not follow references to core cache keys
@ -1498,7 +1489,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
LRUQueryCache cache = new LRUQueryCache(1, 10000, context -> true);
searcher.setQueryCache(cache);
DVCacheQuery query = new DVCacheQuery("field");