From 5ff9ca9965a5b38d463dbb2a45161f1b2c95a4af Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 21 Aug 2015 13:47:26 +0200 Subject: [PATCH] Optimize counts on simple queries. Today we always collect in order to compute counts, but some of them can be easily optimized by using pre-computed index statistics. This is especially true in the case that there are no deletions, which should be common for the time-based data use-case. Counts on match_all queries can always be optimized, so requests like ``` GET index/_search?size=0 GET index/_search { "size": 0, "query" : { "match_all": {} } } ``` should now return almost instantly. Additionally, when there are no deletions, term queries are also optimized, so the below queries which all boil down to a single term query would also return almost immediately: ``` GET index/type/_search?size=0 GET index/_search { "size": 0, "query" : { "match": { "foo": "bar" } } } GET index/_search { "size": 0, "query" : { "constant_score": { "filter": { "exists": { "field": "foo" } } } } } ``` --- .../search/query/QueryPhase.java | 92 ++++++++-- .../search/query/QueryPhaseTests.java | 167 ++++++++++++++++++ .../elasticsearch/test/TestSearchContext.java | 28 ++- 3 files changed, 261 insertions(+), 26 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java diff --git a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java index 06451af2be9..a7c022aba28 100644 --- a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -20,6 +20,9 @@ package org.elasticsearch.search.query; import com.google.common.collect.ImmutableMap; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.queries.MinDocQuery; import org.apache.lucene.search.*; import org.elasticsearch.action.search.SearchType; @@ -100,23 +103,39 @@ public class QueryPhase implements SearchPhase { // here to make sure it happens during the QUERY phase aggregationPhase.preProcess(searchContext); - searchContext.queryResult().searchTimedOut(false); + boolean rescore = execute(searchContext, searchContext.searcher()); + + if (rescore) { // only if we do a regular search + rescorePhase.execute(searchContext); + } + suggestPhase.execute(searchContext); + aggregationPhase.execute(searchContext); + } + + /** + * In a package-private method so that it can be tested without having to + * wire everything (mapperService, etc.) + * @return whether the rescoring phase should be executed + */ + static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException { + QuerySearchResult queryResult = searchContext.queryResult(); + queryResult.searchTimedOut(false); final SearchType searchType = searchContext.searchType(); boolean rescore = false; try { - searchContext.queryResult().from(searchContext.from()); - searchContext.queryResult().size(searchContext.size()); + queryResult.from(searchContext.from()); + queryResult.size(searchContext.size()); - final IndexSearcher searcher = searchContext.searcher(); Query query = searchContext.query(); final int totalNumDocs = searcher.getIndexReader().numDocs(); int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs); Collector collector; - final Callable topDocsCallable; + Callable topDocsCallable; + assert query == searcher.rewrite(query); // already rewritten if (searchContext.size() == 0) { // no matter what the value of from is final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); collector = totalHitCountCollector; @@ -240,36 +259,75 @@ public class QueryPhase implements SearchPhase { collector = new MinimumScoreCollector(collector, searchContext.minimumScore()); } + if (collector.getClass() == TotalHitCountCollector.class) { + // Optimize counts in simple cases to return in constant time + // instead of using a collector + while (true) { + // remove wrappers that don't matter for counts + // this is necessary so that we don't only optimize match_all + // queries but also match_all queries that are nested in + // a constant_score query + if (query instanceof ConstantScoreQuery) { + query = ((ConstantScoreQuery) query).getQuery(); + } else { + break; + } + } + + if (query.getClass() == MatchAllDocsQuery.class) { + collector = null; + topDocsCallable = new Callable() { + @Override + public TopDocs call() throws Exception { + int count = searcher.getIndexReader().numDocs(); + return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0); + } + }; + } else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) { + final Term term = ((TermQuery) query).getTerm(); + collector = null; + topDocsCallable = new Callable() { + @Override + public TopDocs call() throws Exception { + int count = 0; + for (LeafReaderContext context : searcher.getIndexReader().leaves()) { + count += context.reader().docFreq(term); + } + return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0); + } + }; + } + } + final boolean timeoutSet = searchContext.timeoutInMillis() != SearchService.NO_TIMEOUT.millis(); - if (timeoutSet) { + if (timeoutSet && collector != null) { // collector might be null if no collection is actually needed // TODO: change to use our own counter that uses the scheduler in ThreadPool // throws TimeLimitingCollector.TimeExceededException when timeout has reached collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis()); } try { - searchContext.searcher().search(query, collector); + if (collector != null) { + searcher.search(query, collector); + } } catch (TimeLimitingCollector.TimeExceededException e) { assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set"; - searchContext.queryResult().searchTimedOut(true); + queryResult.searchTimedOut(true); } catch (Lucene.EarlyTerminationException e) { assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set"; - searchContext.queryResult().terminatedEarly(true); + queryResult.terminatedEarly(true); } finally { searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION); } - if (terminateAfterSet && searchContext.queryResult().terminatedEarly() == null) { - searchContext.queryResult().terminatedEarly(false); + if (terminateAfterSet && queryResult.terminatedEarly() == null) { + queryResult.terminatedEarly(false); } - searchContext.queryResult().topDocs(topDocsCallable.call()); + queryResult.topDocs(topDocsCallable.call()); + + return rescore; } catch (Throwable e) { throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e); } - if (rescore) { // only if we do a regular search - rescorePhase.execute(searchContext); - } - suggestPhase.execute(searchContext); - aggregationPhase.execute(searchContext); } } diff --git a/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java new file mode 100644 index 00000000000..99d600752ad --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -0,0 +1,167 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.query.ParsedQuery; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.TestSearchContext; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +public class QueryPhaseTests extends ESTestCase { + + private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception { + TestSearchContext context = new TestSearchContext(); + context.parsedQuery(new ParsedQuery(query)); + context.setSize(0); + + IndexSearcher searcher = new IndexSearcher(reader); + final AtomicBoolean collected = new AtomicBoolean(); + IndexSearcher contextSearcher = new IndexSearcher(reader) { + protected void search(List leaves, Weight weight, Collector collector) throws IOException { + collected.set(true); + super.search(leaves, weight, collector); + } + }; + + final boolean rescore = QueryPhase.execute(context, contextSearcher); + assertFalse(rescore); + assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits); + assertEquals(shouldCollect, collected.get()); + } + + private void countTestCase(boolean withDeletions) throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE); + RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir, iwc); + final int numDocs = scaledRandomIntBetween(100, 200); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + if (randomBoolean()) { + doc.add(new StringField("foo", "bar", Store.NO)); + } + if (randomBoolean()) { + doc.add(new StringField("foo", "baz", Store.NO)); + } + if (withDeletions && (rarely() || i == 0)) { + doc.add(new StringField("delete", "yes", Store.NO)); + } + w.addDocument(doc); + } + if (withDeletions) { + w.deleteDocuments(new Term("delete", "yes")); + } + final IndexReader reader = w.getReader(); + Query matchAll = new MatchAllDocsQuery(); + Query matchAllCsq = new ConstantScoreQuery(matchAll); + Query tq = new TermQuery(new Term("foo", "bar")); + Query tCsq = new ConstantScoreQuery(tq); + BooleanQuery bq = new BooleanQuery(); + bq.add(matchAll, Occur.SHOULD); + bq.add(tq, Occur.MUST); + + countTestCase(matchAll, reader, false); + countTestCase(matchAllCsq, reader, false); + countTestCase(tq, reader, withDeletions); + countTestCase(tCsq, reader, withDeletions); + countTestCase(bq, reader, true); + reader.close(); + w.close(); + dir.close(); + } + + public void testCountWithoutDeletions() throws Exception { + countTestCase(false); + } + + public void testCountWithDeletions() throws Exception { + countTestCase(true); + } + + public void testPostFilterDisablesCountOptimization() throws Exception { + TestSearchContext context = new TestSearchContext(); + context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery())); + context.setSize(0); + + final AtomicBoolean collected = new AtomicBoolean(); + IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) { + protected void search(List leaves, Weight weight, Collector collector) throws IOException { + collected.set(true); + super.search(leaves, weight, collector); + } + }; + + QueryPhase.execute(context, contextSearcher); + assertEquals(0, context.queryResult().topDocs().totalHits); + assertFalse(collected.get()); + + context.parsedPostFilter(new ParsedQuery(new MatchNoDocsQuery())); + QueryPhase.execute(context, contextSearcher); + assertEquals(0, context.queryResult().topDocs().totalHits); + assertTrue(collected.get()); + } + + public void testMinScoreDisablesCountOptimization() throws Exception { + TestSearchContext context = new TestSearchContext(); + context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery())); + context.setSize(0); + + final AtomicBoolean collected = new AtomicBoolean(); + IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) { + protected void search(List leaves, Weight weight, Collector collector) throws IOException { + collected.set(true); + super.search(leaves, weight, collector); + } + }; + + QueryPhase.execute(context, contextSearcher); + assertEquals(0, context.queryResult().topDocs().totalHits); + assertFalse(collected.get()); + + context.minimumScore(1); + QueryPhase.execute(context, contextSearcher); + assertEquals(0, context.queryResult().topDocs().totalHits); + assertTrue(collected.get()); + } + +} diff --git a/core/src/test/java/org/elasticsearch/test/TestSearchContext.java b/core/src/test/java/org/elasticsearch/test/TestSearchContext.java index 48725f4c3e4..56766b748b0 100644 --- a/core/src/test/java/org/elasticsearch/test/TestSearchContext.java +++ b/core/src/test/java/org/elasticsearch/test/TestSearchContext.java @@ -83,6 +83,12 @@ public class TestSearchContext extends SearchContext { final ThreadPool threadPool; final Map, Collector> queryCollectors = new HashMap<>(); final IndexShard indexShard; + final Counter timeEstimateCounter = Counter.newCounter(); + final QuerySearchResult queryResult = new QuerySearchResult(); + ParsedQuery originalQuery; + ParsedQuery postFilter; + Query query; + Float minScore; ContextIndexSearcher searcher; int size; @@ -363,12 +369,13 @@ public class TestSearchContext extends SearchContext { @Override public SearchContext minimumScore(float minimumScore) { - return null; + this.minScore = minimumScore; + return this; } @Override public Float minimumScore() { - return null; + return minScore; } @Override @@ -393,12 +400,13 @@ public class TestSearchContext extends SearchContext { @Override public SearchContext parsedPostFilter(ParsedQuery postFilter) { - return null; + this.postFilter = postFilter; + return this; } @Override public ParsedQuery parsedPostFilter() { - return null; + return postFilter; } @Override @@ -408,17 +416,19 @@ public class TestSearchContext extends SearchContext { @Override public SearchContext parsedQuery(ParsedQuery query) { - return null; + this.originalQuery = query; + this.query = query.query(); + return this; } @Override public ParsedQuery parsedQuery() { - return null; + return originalQuery; } @Override public Query query() { - return null; + return query; } @Override @@ -537,7 +547,7 @@ public class TestSearchContext extends SearchContext { @Override public QuerySearchResult queryResult() { - return null; + return queryResult; } @Override @@ -580,7 +590,7 @@ public class TestSearchContext extends SearchContext { @Override public Counter timeEstimateCounter() { - throw new UnsupportedOperationException(); + return timeEstimateCounter; } @Override