mirror of https://github.com/apache/lucene.git
LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for caching as they could break memory accounting of the query cache.
This commit is contained in:
parent
70767b109f
commit
6d34f23263
|
@ -123,6 +123,10 @@ Optimizations
|
|||
(Dawid Weiss, Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-8062: GlobalOrdinalsQuery is no longer eligible for caching. (Jim Ferenczi)
|
||||
|
||||
* LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for
|
||||
caching as they could break memory accounting of the query cache.
|
||||
(Adrien Grand)
|
||||
|
||||
Tests
|
||||
|
||||
|
|
|
@ -301,6 +301,12 @@ final class BooleanWeight extends Weight {
|
|||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
|
||||
// Disallow caching large boolean queries to not encourage users
|
||||
// to build large boolean queries as a workaround to the fact that
|
||||
// we disallow caching large TermInSetQueries.
|
||||
return false;
|
||||
}
|
||||
for (Weight w : weights) {
|
||||
if (w.isCacheable(ctx) == false)
|
||||
return false;
|
||||
|
|
|
@ -139,6 +139,12 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
|
||||
// Disallow caching large dismax queries to not encourage users
|
||||
// to build large dismax queries as a workaround to the fact that
|
||||
// we disallow caching large TermInSetQueries.
|
||||
return false;
|
||||
}
|
||||
for (Weight w : weights) {
|
||||
if (w.isCacheable(ctx) == false)
|
||||
return false;
|
||||
|
|
|
@ -87,8 +87,9 @@ import org.apache.lucene.util.RoaringDocIdSet;
|
|||
*/
|
||||
public class LRUQueryCache implements QueryCache, Accountable {
|
||||
|
||||
// memory usage of a simple term query
|
||||
static final long QUERY_DEFAULT_RAM_BYTES_USED = 192;
|
||||
// approximate memory usage that we assign to all queries
|
||||
// this maps roughly to a BooleanQuery with a couple term clauses
|
||||
static final long QUERY_DEFAULT_RAM_BYTES_USED = 1024;
|
||||
|
||||
static final long HASHTABLE_RAM_BYTES_PER_ENTRY =
|
||||
2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value
|
||||
|
@ -298,7 +299,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
try {
|
||||
Query singleton = uniqueQueries.putIfAbsent(query, query);
|
||||
if (singleton == null) {
|
||||
onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query));
|
||||
onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
|
||||
} else {
|
||||
query = singleton;
|
||||
}
|
||||
|
@ -381,7 +382,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
|
||||
private void onEviction(Query singleton) {
|
||||
assert lock.isHeldByCurrentThread();
|
||||
onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(singleton));
|
||||
onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
|
||||
for (LeafCache leafCache : cache.values()) {
|
||||
leafCache.remove(singleton);
|
||||
}
|
||||
|
@ -421,9 +422,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
long recomputedRamBytesUsed =
|
||||
HASHTABLE_RAM_BYTES_PER_ENTRY * cache.size()
|
||||
+ LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY * uniqueQueries.size();
|
||||
for (Query query : mostRecentlyUsedQueries) {
|
||||
recomputedRamBytesUsed += ramBytesUsed(query);
|
||||
}
|
||||
recomputedRamBytesUsed += mostRecentlyUsedQueries.size() * QUERY_DEFAULT_RAM_BYTES_USED;
|
||||
for (LeafCache leafCache : cache.values()) {
|
||||
recomputedRamBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY * leafCache.cache.size();
|
||||
for (DocIdSet set : leafCache.cache.values()) {
|
||||
|
@ -481,18 +480,6 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of bytes used by the given query. The default
|
||||
* implementation returns {@link Accountable#ramBytesUsed()} if the query
|
||||
* implements {@link Accountable} and <code>1024</code> otherwise.
|
||||
*/
|
||||
protected long ramBytesUsed(Query query) {
|
||||
if (query instanceof Accountable) {
|
||||
return ((Accountable) query).ramBytesUsed();
|
||||
}
|
||||
return QUERY_DEFAULT_RAM_BYTES_USED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default cache implementation: uses {@link RoaringDocIdSet} for sets that
|
||||
* have a density < 1% and a {@link BitDocIdSet} over a {@link FixedBitSet}
|
||||
|
|
|
@ -318,7 +318,9 @@ public class TermInSetQuery extends Query implements Accountable {
|
|||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
return true;
|
||||
// Only cache instances that have a reasonable size. Otherwise it might cause memory issues
|
||||
// with the query cache if most memory ends up being spent on queries rather than doc id sets.
|
||||
return ramBytesUsed() <= LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -77,15 +77,6 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
|
||||
};
|
||||
|
||||
public void testFilterRamBytesUsed() {
|
||||
final Query simpleQuery = new TermQuery(new Term("some_field", "some_term"));
|
||||
final long actualRamBytesUsed = RamUsageTester.sizeOf(simpleQuery);
|
||||
final long ramBytesUsed = LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
|
||||
// we cannot assert exactly that the constant is correct since actual
|
||||
// memory usage depends on JVM implementations and settings (eg. UseCompressedOops)
|
||||
assertEquals(actualRamBytesUsed, ramBytesUsed, actualRamBytesUsed / 2);
|
||||
}
|
||||
|
||||
public void testConcurrency() throws Throwable {
|
||||
final LRUQueryCache queryCache = new LRUQueryCache(1 + random().nextInt(20), 1 + random().nextInt(10000), context -> random().nextBoolean());
|
||||
Directory dir = newDirectory();
|
||||
|
@ -282,7 +273,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
return ((DocIdSet) o).ramBytesUsed();
|
||||
}
|
||||
if (o instanceof Query) {
|
||||
return queryCache.ramBytesUsed((Query) o);
|
||||
return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
|
||||
}
|
||||
if (o instanceof IndexReader || o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
|
||||
// do not take readers or core cache keys into account
|
||||
|
@ -403,7 +394,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
return ((DocIdSet) o).ramBytesUsed();
|
||||
}
|
||||
if (o instanceof Query) {
|
||||
return queryCache.ramBytesUsed((Query) o);
|
||||
return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
|
||||
}
|
||||
if (o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
|
||||
// do not follow references to core cache keys
|
||||
|
@ -1498,7 +1489,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
|||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
|
||||
|
||||
LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
|
||||
LRUQueryCache cache = new LRUQueryCache(1, 10000, context -> true);
|
||||
searcher.setQueryCache(cache);
|
||||
|
||||
DVCacheQuery query = new DVCacheQuery("field");
|
||||
|
|
Loading…
Reference in New Issue