LUCENE-7050: Cache TermsQuery and point queries more aggressively.

2016-02-29 11:02:57 +01:00 · 2016-02-29 11:02:57 +01:00 · 44324d3dfe
parent 46d05afdae
commit 44324d3dfe
3 changed files with 43 additions and 1 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -124,6 +124,9 @@ Optimizations
 * LUCENE-6793: LegacyNumericRangeQuery.hashCode() is now less subject to hash
  collisions. (J.B. Langston via Adrien Grand)
 * LUCENE-7050: TermsQuery is now cached more aggressively by the default
  query caching policy. (Adrien Grand)
 Changes in Runtime Behavior
 * LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
--- a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
@ -37,13 +37,27 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
  // the hash code that we use as a sentinel in the ring buffer.
  private static final int SENTINEL = Integer.MIN_VALUE;
  private static boolean isPointQuery(Query query) {
    // we need to check for super classes because we occasionally use anonymous
    // sub classes of eg. PointRangeQuery
    for (Class<?> clazz = query.getClass(); clazz != Query.class; clazz = clazz.getSuperclass()) {
      final String simpleName = clazz.getSimpleName();
      if (simpleName.startsWith("Point") && simpleName.endsWith("Query")) {
        return true;
      }
    }
    return false;
  }
  static boolean isCostly(Query query) {
    // This does not measure the cost of iterating over the filter (for this we
    // already have the DocIdSetIterator#cost API) but the cost to build the
    // DocIdSet in the first place
    return query instanceof MultiTermQuery ||
        query instanceof MultiTermQueryConstantScoreWrapper ||
-        query instanceof PointRangeQuery;
+        isPointQuery(query) ||
        // can't refer to TermsQuery directly as it is in another module
        "TermsQuery".equals(query.getClass().getSimpleName());
  }
  static boolean isCheap(Query query) {
--- a/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
@ -34,6 +34,8 @@ import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.FilterDirectoryReader;
 import org.apache.lucene.index.FilterLeafReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@ -49,6 +51,7 @@ import org.apache.lucene.search.QueryUtils;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
@ -325,4 +328,26 @@ public class TermsQueryTest extends LuceneTestCase {
    TermsQuery query = new TermsQuery(new Term("field", new BytesRef(new byte[] { (byte) 0xff, (byte) 0xfe })));
    assertEquals("field:[ff fe]", query.toString());
  }
  public void testIsConsideredCostlyByQueryCache() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    for (int i = 0; i < 10000; ++i) {
      w.addDocument(doc);
    }
    w.forceMerge(1);
    DirectoryReader reader = DirectoryReader.open(w);
    w.close();
    TermsQuery query = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz"));
    UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
    assertFalse(policy.shouldCache(query, getOnlySegmentReader(reader).getContext()));
    policy.onUse(query);
    policy.onUse(query);
    // cached after two uses
    assertTrue(policy.shouldCache(query, getOnlySegmentReader(reader).getContext()));
    reader.close();
    dir.close();
  }
 }