From 44324d3dfe34fb436595f8c15bfc97eb39564b1f Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 29 Feb 2016 11:02:57 +0100 Subject: [PATCH] LUCENE-7050: Cache TermsQuery and point queries more aggressively. --- lucene/CHANGES.txt | 3 +++ .../UsageTrackingQueryCachingPolicy.java | 16 +++++++++++- .../apache/lucene/queries/TermsQueryTest.java | 25 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7060170a794..c0925e8ab64 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -124,6 +124,9 @@ Optimizations * LUCENE-6793: LegacyNumericRangeQuery.hashCode() is now less subject to hash collisions. (J.B. Langston via Adrien Grand) +* LUCENE-7050: TermsQuery is now cached more aggressively by the default + query caching policy. (Adrien Grand) + Changes in Runtime Behavior * LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity. diff --git a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java index 1f4864519c1..417de58f732 100644 --- a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java +++ b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java @@ -37,13 +37,27 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy // the hash code that we use as a sentinel in the ring buffer. private static final int SENTINEL = Integer.MIN_VALUE; + private static boolean isPointQuery(Query query) { + // we need to check for super classes because we occasionally use anonymous + // sub classes of eg. PointRangeQuery + for (Class clazz = query.getClass(); clazz != Query.class; clazz = clazz.getSuperclass()) { + final String simpleName = clazz.getSimpleName(); + if (simpleName.startsWith("Point") && simpleName.endsWith("Query")) { + return true; + } + } + return false; + } + static boolean isCostly(Query query) { // This does not measure the cost of iterating over the filter (for this we // already have the DocIdSetIterator#cost API) but the cost to build the // DocIdSet in the first place return query instanceof MultiTermQuery || query instanceof MultiTermQueryConstantScoreWrapper || - query instanceof PointRangeQuery; + isPointQuery(query) || + // can't refer to TermsQuery directly as it is in another module + "TermsQuery".equals(query.getClass().getSimpleName()); } static boolean isCheap(Query query) { diff --git a/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java index 7b1d4c9713f..c14d5438d27 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java @@ -34,6 +34,8 @@ import org.apache.lucene.index.Fields; import org.apache.lucene.index.FilterDirectoryReader; import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -49,6 +51,7 @@ import org.apache.lucene.search.QueryUtils; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.UsageTrackingQueryCachingPolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; @@ -325,4 +328,26 @@ public class TermsQueryTest extends LuceneTestCase { TermsQuery query = new TermsQuery(new Term("field", new BytesRef(new byte[] { (byte) 0xff, (byte) 0xfe }))); assertEquals("field:[ff fe]", query.toString()); } + + public void testIsConsideredCostlyByQueryCache() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + for (int i = 0; i < 10000; ++i) { + w.addDocument(doc); + } + w.forceMerge(1); + DirectoryReader reader = DirectoryReader.open(w); + w.close(); + TermsQuery query = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz")); + UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy(); + assertFalse(policy.shouldCache(query, getOnlySegmentReader(reader).getContext())); + policy.onUse(query); + policy.onUse(query); + // cached after two uses + assertTrue(policy.shouldCache(query, getOnlySegmentReader(reader).getContext())); + reader.close(); + dir.close(); + } }