LUCENE-7050: Cache TermsQuery and point queries more aggressively.

This commit is contained in:
Adrien Grand 2016-02-29 11:02:57 +01:00
parent 46d05afdae
commit 44324d3dfe
3 changed files with 43 additions and 1 deletions

View File

@ -124,6 +124,9 @@ Optimizations
* LUCENE-6793: LegacyNumericRangeQuery.hashCode() is now less subject to hash
collisions. (J.B. Langston via Adrien Grand)
* LUCENE-7050: TermsQuery is now cached more aggressively by the default
query caching policy. (Adrien Grand)
Changes in Runtime Behavior
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.

View File

@ -37,13 +37,27 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
// the hash code that we use as a sentinel in the ring buffer.
private static final int SENTINEL = Integer.MIN_VALUE;
private static boolean isPointQuery(Query query) {
// we need to check for super classes because we occasionally use anonymous
// sub classes of eg. PointRangeQuery
for (Class<?> clazz = query.getClass(); clazz != Query.class; clazz = clazz.getSuperclass()) {
final String simpleName = clazz.getSimpleName();
if (simpleName.startsWith("Point") && simpleName.endsWith("Query")) {
return true;
}
}
return false;
}
static boolean isCostly(Query query) {
// This does not measure the cost of iterating over the filter (for this we
// already have the DocIdSetIterator#cost API) but the cost to build the
// DocIdSet in the first place
return query instanceof MultiTermQuery ||
query instanceof MultiTermQueryConstantScoreWrapper ||
query instanceof PointRangeQuery;
isPointQuery(query) ||
// can't refer to TermsQuery directly as it is in another module
"TermsQuery".equals(query.getClass().getSimpleName());
}
static boolean isCheap(Query query) {

View File

@ -34,6 +34,8 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -49,6 +51,7 @@ import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -325,4 +328,26 @@ public class TermsQueryTest extends LuceneTestCase {
TermsQuery query = new TermsQuery(new Term("field", new BytesRef(new byte[] { (byte) 0xff, (byte) 0xfe })));
assertEquals("field:[ff fe]", query.toString());
}
public void testIsConsideredCostlyByQueryCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
for (int i = 0; i < 10000; ++i) {
w.addDocument(doc);
}
w.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(w);
w.close();
TermsQuery query = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz"));
UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
assertFalse(policy.shouldCache(query, getOnlySegmentReader(reader).getContext()));
policy.onUse(query);
policy.onUse(query);
// cached after two uses
assertTrue(policy.shouldCache(query, getOnlySegmentReader(reader).getContext()));
reader.close();
dir.close();
}
}