LUCENE-7050: Cache TermsQuery and point queries more aggressively.

2016-02-29 11:02:57 +01:00 · 2016-02-29 11:02:57 +01:00 · 44324d3dfe
parent 46d05afdae
commit 44324d3dfe
3 changed files with 43 additions and 1 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -124,6 +124,9 @@ Optimizations
 * LUCENE-6793: LegacyNumericRangeQuery.hashCode() is now less subject to hash
  collisions. (J.B. Langston via Adrien Grand)

+* LUCENE-7050: TermsQuery is now cached more aggressively by the default
+  query caching policy. (Adrien Grand)
+
 Changes in Runtime Behavior

 * LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
--- a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
@ -37,13 +37,27 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
  // the hash code that we use as a sentinel in the ring buffer.
  private static final int SENTINEL = Integer.MIN_VALUE;

+  private static boolean isPointQuery(Query query) {
+    // we need to check for super classes because we occasionally use anonymous
+    // sub classes of eg. PointRangeQuery
+    for (Class<?> clazz = query.getClass(); clazz != Query.class; clazz = clazz.getSuperclass()) {
+      final String simpleName = clazz.getSimpleName();
+      if (simpleName.startsWith("Point") && simpleName.endsWith("Query")) {
+        return true;
+      }
+    }
+    return false;
+  }
+
  static boolean isCostly(Query query) {
    // This does not measure the cost of iterating over the filter (for this we
    // already have the DocIdSetIterator#cost API) but the cost to build the
    // DocIdSet in the first place
    return query instanceof MultiTermQuery ||
        query instanceof MultiTermQueryConstantScoreWrapper ||
-        query instanceof PointRangeQuery;
+        isPointQuery(query) ||
+        // can't refer to TermsQuery directly as it is in another module
+        "TermsQuery".equals(query.getClass().getSimpleName());
  }

  static boolean isCheap(Query query) {
--- a/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
@ -34,6 +34,8 @@ import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.FilterDirectoryReader;
 import org.apache.lucene.index.FilterLeafReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@ -49,6 +51,7 @@ import org.apache.lucene.search.QueryUtils;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
@ -325,4 +328,26 @@ public class TermsQueryTest extends LuceneTestCase {
    TermsQuery query = new TermsQuery(new Term("field", new BytesRef(new byte[] { (byte) 0xff, (byte) 0xfe })));
    assertEquals("field:[ff fe]", query.toString());
  }
+
+  public void testIsConsideredCostlyByQueryCache() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Document doc = new Document();
+    for (int i = 0; i < 10000; ++i) {
+      w.addDocument(doc);
+    }
+    w.forceMerge(1);
+    DirectoryReader reader = DirectoryReader.open(w);
+    w.close();
+    TermsQuery query = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+    UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
+    assertFalse(policy.shouldCache(query, getOnlySegmentReader(reader).getContext()));
+    policy.onUse(query);
+    policy.onUse(query);
+    // cached after two uses
+    assertTrue(policy.shouldCache(query, getOnlySegmentReader(reader).getContext()));
+    reader.close();
+    dir.close();
+  }
 }