Optimize counts on simple queries.

Today we always collect in order to compute counts, but some of them can be
easily optimized by using pre-computed index statistics. This is especially
true in the case that there are no deletions, which should be common for the
time-based data use-case.

Counts on match_all queries can always be optimized, so requests like

```
GET index/_search?size=0

GET index/_search
{
  "size": 0,
  "query" : {
    "match_all": {}
  }
}
```

should now return almost instantly. Additionally, when there are no deletions,
term queries are also optimized, so the below queries which all boil down to a
single term query would also return almost immediately:

```
GET index/type/_search?size=0

GET index/_search
{
  "size": 0,
  "query" : {
    "match": {
      "foo": "bar"
    }
  }
}

GET index/_search
{
  "size": 0,
  "query" : {
    "constant_score": {
      "filter": {
        "exists": {
          "field": "foo"
        }
      }
    }
  }
}
```
This commit is contained in:
Adrien Grand 2015-08-21 13:47:26 +02:00
parent bd44dbe5cd
commit 5ff9ca9965
3 changed files with 261 additions and 26 deletions

View File

@ -20,6 +20,9 @@
package org.elasticsearch.search.query; package org.elasticsearch.search.query;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.MinDocQuery; import org.apache.lucene.queries.MinDocQuery;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.elasticsearch.action.search.SearchType; import org.elasticsearch.action.search.SearchType;
@ -100,23 +103,39 @@ public class QueryPhase implements SearchPhase {
// here to make sure it happens during the QUERY phase // here to make sure it happens during the QUERY phase
aggregationPhase.preProcess(searchContext); aggregationPhase.preProcess(searchContext);
searchContext.queryResult().searchTimedOut(false); boolean rescore = execute(searchContext, searchContext.searcher());
if (rescore) { // only if we do a regular search
rescorePhase.execute(searchContext);
}
suggestPhase.execute(searchContext);
aggregationPhase.execute(searchContext);
}
/**
* In a package-private method so that it can be tested without having to
* wire everything (mapperService, etc.)
* @return whether the rescoring phase should be executed
*/
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
QuerySearchResult queryResult = searchContext.queryResult();
queryResult.searchTimedOut(false);
final SearchType searchType = searchContext.searchType(); final SearchType searchType = searchContext.searchType();
boolean rescore = false; boolean rescore = false;
try { try {
searchContext.queryResult().from(searchContext.from()); queryResult.from(searchContext.from());
searchContext.queryResult().size(searchContext.size()); queryResult.size(searchContext.size());
final IndexSearcher searcher = searchContext.searcher();
Query query = searchContext.query(); Query query = searchContext.query();
final int totalNumDocs = searcher.getIndexReader().numDocs(); final int totalNumDocs = searcher.getIndexReader().numDocs();
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs); int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
Collector collector; Collector collector;
final Callable<TopDocs> topDocsCallable; Callable<TopDocs> topDocsCallable;
assert query == searcher.rewrite(query); // already rewritten
if (searchContext.size() == 0) { // no matter what the value of from is if (searchContext.size() == 0) { // no matter what the value of from is
final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
collector = totalHitCountCollector; collector = totalHitCountCollector;
@ -240,36 +259,75 @@ public class QueryPhase implements SearchPhase {
collector = new MinimumScoreCollector(collector, searchContext.minimumScore()); collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
} }
if (collector.getClass() == TotalHitCountCollector.class) {
// Optimize counts in simple cases to return in constant time
// instead of using a collector
while (true) {
// remove wrappers that don't matter for counts
// this is necessary so that we don't only optimize match_all
// queries but also match_all queries that are nested in
// a constant_score query
if (query instanceof ConstantScoreQuery) {
query = ((ConstantScoreQuery) query).getQuery();
} else {
break;
}
}
if (query.getClass() == MatchAllDocsQuery.class) {
collector = null;
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
int count = searcher.getIndexReader().numDocs();
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
}
};
} else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
final Term term = ((TermQuery) query).getTerm();
collector = null;
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
int count = 0;
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
count += context.reader().docFreq(term);
}
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
}
};
}
}
final boolean timeoutSet = searchContext.timeoutInMillis() != SearchService.NO_TIMEOUT.millis(); final boolean timeoutSet = searchContext.timeoutInMillis() != SearchService.NO_TIMEOUT.millis();
if (timeoutSet) { if (timeoutSet && collector != null) { // collector might be null if no collection is actually needed
// TODO: change to use our own counter that uses the scheduler in ThreadPool // TODO: change to use our own counter that uses the scheduler in ThreadPool
// throws TimeLimitingCollector.TimeExceededException when timeout has reached // throws TimeLimitingCollector.TimeExceededException when timeout has reached
collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis()); collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis());
} }
try { try {
searchContext.searcher().search(query, collector); if (collector != null) {
searcher.search(query, collector);
}
} catch (TimeLimitingCollector.TimeExceededException e) { } catch (TimeLimitingCollector.TimeExceededException e) {
assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set"; assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
searchContext.queryResult().searchTimedOut(true); queryResult.searchTimedOut(true);
} catch (Lucene.EarlyTerminationException e) { } catch (Lucene.EarlyTerminationException e) {
assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set"; assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
searchContext.queryResult().terminatedEarly(true); queryResult.terminatedEarly(true);
} finally { } finally {
searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION); searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
} }
if (terminateAfterSet && searchContext.queryResult().terminatedEarly() == null) { if (terminateAfterSet && queryResult.terminatedEarly() == null) {
searchContext.queryResult().terminatedEarly(false); queryResult.terminatedEarly(false);
} }
searchContext.queryResult().topDocs(topDocsCallable.call()); queryResult.topDocs(topDocsCallable.call());
return rescore;
} catch (Throwable e) { } catch (Throwable e) {
throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e); throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
} }
if (rescore) { // only if we do a regular search
rescorePhase.execute(searchContext);
}
suggestPhase.execute(searchContext);
aggregationPhase.execute(searchContext);
} }
} }

View File

@ -0,0 +1,167 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.query;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.TestSearchContext;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
public class QueryPhaseTests extends ESTestCase {
private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
TestSearchContext context = new TestSearchContext();
context.parsedQuery(new ParsedQuery(query));
context.setSize(0);
IndexSearcher searcher = new IndexSearcher(reader);
final AtomicBoolean collected = new AtomicBoolean();
IndexSearcher contextSearcher = new IndexSearcher(reader) {
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
collected.set(true);
super.search(leaves, weight, collector);
}
};
final boolean rescore = QueryPhase.execute(context, contextSearcher);
assertFalse(rescore);
assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits);
assertEquals(shouldCollect, collected.get());
}
private void countTestCase(boolean withDeletions) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir, iwc);
final int numDocs = scaledRandomIntBetween(100, 200);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (randomBoolean()) {
doc.add(new StringField("foo", "bar", Store.NO));
}
if (randomBoolean()) {
doc.add(new StringField("foo", "baz", Store.NO));
}
if (withDeletions && (rarely() || i == 0)) {
doc.add(new StringField("delete", "yes", Store.NO));
}
w.addDocument(doc);
}
if (withDeletions) {
w.deleteDocuments(new Term("delete", "yes"));
}
final IndexReader reader = w.getReader();
Query matchAll = new MatchAllDocsQuery();
Query matchAllCsq = new ConstantScoreQuery(matchAll);
Query tq = new TermQuery(new Term("foo", "bar"));
Query tCsq = new ConstantScoreQuery(tq);
BooleanQuery bq = new BooleanQuery();
bq.add(matchAll, Occur.SHOULD);
bq.add(tq, Occur.MUST);
countTestCase(matchAll, reader, false);
countTestCase(matchAllCsq, reader, false);
countTestCase(tq, reader, withDeletions);
countTestCase(tCsq, reader, withDeletions);
countTestCase(bq, reader, true);
reader.close();
w.close();
dir.close();
}
public void testCountWithoutDeletions() throws Exception {
countTestCase(false);
}
public void testCountWithDeletions() throws Exception {
countTestCase(true);
}
public void testPostFilterDisablesCountOptimization() throws Exception {
TestSearchContext context = new TestSearchContext();
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.setSize(0);
final AtomicBoolean collected = new AtomicBoolean();
IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) {
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
collected.set(true);
super.search(leaves, weight, collector);
}
};
QueryPhase.execute(context, contextSearcher);
assertEquals(0, context.queryResult().topDocs().totalHits);
assertFalse(collected.get());
context.parsedPostFilter(new ParsedQuery(new MatchNoDocsQuery()));
QueryPhase.execute(context, contextSearcher);
assertEquals(0, context.queryResult().topDocs().totalHits);
assertTrue(collected.get());
}
public void testMinScoreDisablesCountOptimization() throws Exception {
TestSearchContext context = new TestSearchContext();
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.setSize(0);
final AtomicBoolean collected = new AtomicBoolean();
IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) {
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
collected.set(true);
super.search(leaves, weight, collector);
}
};
QueryPhase.execute(context, contextSearcher);
assertEquals(0, context.queryResult().topDocs().totalHits);
assertFalse(collected.get());
context.minimumScore(1);
QueryPhase.execute(context, contextSearcher);
assertEquals(0, context.queryResult().topDocs().totalHits);
assertTrue(collected.get());
}
}

View File

@ -83,6 +83,12 @@ public class TestSearchContext extends SearchContext {
final ThreadPool threadPool; final ThreadPool threadPool;
final Map<Class<?>, Collector> queryCollectors = new HashMap<>(); final Map<Class<?>, Collector> queryCollectors = new HashMap<>();
final IndexShard indexShard; final IndexShard indexShard;
final Counter timeEstimateCounter = Counter.newCounter();
final QuerySearchResult queryResult = new QuerySearchResult();
ParsedQuery originalQuery;
ParsedQuery postFilter;
Query query;
Float minScore;
ContextIndexSearcher searcher; ContextIndexSearcher searcher;
int size; int size;
@ -363,12 +369,13 @@ public class TestSearchContext extends SearchContext {
@Override @Override
public SearchContext minimumScore(float minimumScore) { public SearchContext minimumScore(float minimumScore) {
return null; this.minScore = minimumScore;
return this;
} }
@Override @Override
public Float minimumScore() { public Float minimumScore() {
return null; return minScore;
} }
@Override @Override
@ -393,12 +400,13 @@ public class TestSearchContext extends SearchContext {
@Override @Override
public SearchContext parsedPostFilter(ParsedQuery postFilter) { public SearchContext parsedPostFilter(ParsedQuery postFilter) {
return null; this.postFilter = postFilter;
return this;
} }
@Override @Override
public ParsedQuery parsedPostFilter() { public ParsedQuery parsedPostFilter() {
return null; return postFilter;
} }
@Override @Override
@ -408,17 +416,19 @@ public class TestSearchContext extends SearchContext {
@Override @Override
public SearchContext parsedQuery(ParsedQuery query) { public SearchContext parsedQuery(ParsedQuery query) {
return null; this.originalQuery = query;
this.query = query.query();
return this;
} }
@Override @Override
public ParsedQuery parsedQuery() { public ParsedQuery parsedQuery() {
return null; return originalQuery;
} }
@Override @Override
public Query query() { public Query query() {
return null; return query;
} }
@Override @Override
@ -537,7 +547,7 @@ public class TestSearchContext extends SearchContext {
@Override @Override
public QuerySearchResult queryResult() { public QuerySearchResult queryResult() {
return null; return queryResult;
} }
@Override @Override
@ -580,7 +590,7 @@ public class TestSearchContext extends SearchContext {
@Override @Override
public Counter timeEstimateCounter() { public Counter timeEstimateCounter() {
throw new UnsupportedOperationException(); return timeEstimateCounter;
} }
@Override @Override