LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for caching as they could break memory accounting of the query cache.

2017-11-29 09:29:52 +01:00 · 2017-11-29 09:29:52 +01:00 · 6d34f23263
parent 70767b109f
commit 6d34f23263
6 changed files with 28 additions and 32 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -123,6 +123,10 @@ Optimizations
  (Dawid Weiss, Robert Muir, Mike McCandless)

 * LUCENE-8062: GlobalOrdinalsQuery is no longer eligible for caching. (Jim Ferenczi)
+
+* LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for
+  caching as they could break memory accounting of the query cache.
+  (Adrien Grand)
  
 Tests

--- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
@ -301,6 +301,12 @@ final class BooleanWeight extends Weight {

  @Override
  public boolean isCacheable(LeafReaderContext ctx) {
+    if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
+      // Disallow caching large boolean queries to not encourage users
+      // to build large boolean queries as a workaround to the fact that
+      // we disallow caching large TermInSetQueries.
+      return false;
+    }
    for (Weight w : weights) {
      if (w.isCacheable(ctx) == false)
        return false;
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@ -139,6 +139,12 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
+      if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
+        // Disallow caching large dismax queries to not encourage users
+        // to build large dismax queries as a workaround to the fact that
+        // we disallow caching large TermInSetQueries.
+        return false;
+      }
      for (Weight w : weights) {
        if (w.isCacheable(ctx) == false)
          return false;
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@ -87,8 +87,9 @@ import org.apache.lucene.util.RoaringDocIdSet;
 */
 public class LRUQueryCache implements QueryCache, Accountable {

-  // memory usage of a simple term query
-  static final long QUERY_DEFAULT_RAM_BYTES_USED = 192;
+  // approximate memory usage that we assign to all queries
+  // this maps roughly to a BooleanQuery with a couple term clauses
+  static final long QUERY_DEFAULT_RAM_BYTES_USED = 1024;

  static final long HASHTABLE_RAM_BYTES_PER_ENTRY =
      2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value
@ -298,7 +299,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
    try {
      Query singleton = uniqueQueries.putIfAbsent(query, query);
      if (singleton == null) {
-        onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query));
+        onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
      } else {
        query = singleton;
      }
@ -381,7 +382,7 @@ public class LRUQueryCache implements QueryCache, Accountable {

  private void onEviction(Query singleton) {
    assert lock.isHeldByCurrentThread();
-    onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(singleton));
+    onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
    for (LeafCache leafCache : cache.values()) {
      leafCache.remove(singleton);
    }
@ -421,9 +422,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
      long recomputedRamBytesUsed =
            HASHTABLE_RAM_BYTES_PER_ENTRY * cache.size()
          + LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY * uniqueQueries.size();
-      for (Query query : mostRecentlyUsedQueries) {
-        recomputedRamBytesUsed += ramBytesUsed(query);
-      }
+      recomputedRamBytesUsed += mostRecentlyUsedQueries.size() * QUERY_DEFAULT_RAM_BYTES_USED;
      for (LeafCache leafCache : cache.values()) {
        recomputedRamBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY * leafCache.cache.size();
        for (DocIdSet set : leafCache.cache.values()) {
@ -481,18 +480,6 @@ public class LRUQueryCache implements QueryCache, Accountable {
    }
  }

-  /**
-   * Return the number of bytes used by the given query. The default
-   * implementation returns {@link Accountable#ramBytesUsed()} if the query
-   * implements {@link Accountable} and <code>1024</code> otherwise.
-   */
-  protected long ramBytesUsed(Query query) {
-    if (query instanceof Accountable) {
-      return ((Accountable) query).ramBytesUsed();
-    }
-    return QUERY_DEFAULT_RAM_BYTES_USED;
-  }
-
  /**
   * Default cache implementation: uses {@link RoaringDocIdSet} for sets that
   * have a density &lt; 1% and a {@link BitDocIdSet} over a {@link FixedBitSet}
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@ -318,7 +318,9 @@ public class TermInSetQuery extends Query implements Accountable {

      @Override
      public boolean isCacheable(LeafReaderContext ctx) {
-        return true;
+        // Only cache instances that have a reasonable size. Otherwise it might cause memory issues
+        // with the query cache if most memory ends up being spent on queries rather than doc id sets.
+        return ramBytesUsed() <= LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
      }

    };
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@ -77,15 +77,6 @@ public class TestLRUQueryCache extends LuceneTestCase {

  };

-  public void testFilterRamBytesUsed() {
-    final Query simpleQuery = new TermQuery(new Term("some_field", "some_term"));
-    final long actualRamBytesUsed = RamUsageTester.sizeOf(simpleQuery);
-    final long ramBytesUsed = LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
-    // we cannot assert exactly that the constant is correct since actual
-    // memory usage depends on JVM implementations and settings (eg. UseCompressedOops)
-    assertEquals(actualRamBytesUsed, ramBytesUsed, actualRamBytesUsed / 2);
-  }
-
  public void testConcurrency() throws Throwable {
    final LRUQueryCache queryCache = new LRUQueryCache(1 + random().nextInt(20), 1 + random().nextInt(10000), context -> random().nextBoolean());
    Directory dir = newDirectory();
@ -282,7 +273,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
          return ((DocIdSet) o).ramBytesUsed();
        }
        if (o instanceof Query) {
-          return queryCache.ramBytesUsed((Query) o);
+          return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
        }
        if (o instanceof IndexReader || o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
          // do not take readers or core cache keys into account
@ -403,7 +394,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
          return ((DocIdSet) o).ramBytesUsed();
        }
        if (o instanceof Query) {
-          return queryCache.ramBytesUsed((Query) o);
+          return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
        }
        if (o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
          // do not follow references to core cache keys
@ -1498,7 +1489,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
    IndexSearcher searcher = newSearcher(reader);
    searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);

-    LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
+    LRUQueryCache cache = new LRUQueryCache(1, 10000, context -> true);
    searcher.setQueryCache(cache);

    DVCacheQuery query = new DVCacheQuery("field");