From 5ff9ca9965a5b38d463dbb2a45161f1b2c95a4af Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Fri, 21 Aug 2015 13:47:26 +0200
Subject: [PATCH] Optimize counts on simple queries.

Today we always collect in order to compute counts, but some of them can be
easily optimized by using pre-computed index statistics. This is especially
true in the case that there are no deletions, which should be common for the
time-based data use-case.

Counts on match_all queries can always be optimized, so requests like

```
GET index/_search?size=0

GET index/_search
{
  "size": 0,
  "query" : {
    "match_all": {}
  }
}
```

should now return almost instantly. Additionally, when there are no deletions,
term queries are also optimized, so the below queries which all boil down to a
single term query would also return almost immediately:

```
GET index/type/_search?size=0

GET index/_search
{
  "size": 0,
  "query" : {
    "match": {
      "foo": "bar"
    }
  }
}

GET index/_search
{
  "size": 0,
  "query" : {
    "constant_score": {
      "filter": {
        "exists": {
          "field": "foo"
        }
      }
    }
  }
}
```
---
 .../search/query/QueryPhase.java              |  92 ++++++++--
 .../search/query/QueryPhaseTests.java         | 167 ++++++++++++++++++
 .../elasticsearch/test/TestSearchContext.java |  28 ++-
 3 files changed, 261 insertions(+), 26 deletions(-)
 create mode 100644 core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
diff --git a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java
index 06451af2be9..a7c022aba28 100644
--- a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java
+++ b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java
@@ -20,6 +20,9 @@
 package org.elasticsearch.search.query;
 
 import com.google.common.collect.ImmutableMap;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.MinDocQuery;
 import org.apache.lucene.search.*;
 import org.elasticsearch.action.search.SearchType;
@@ -100,23 +103,39 @@ public class QueryPhase implements SearchPhase {
         // here to make sure it happens during the QUERY phase
         aggregationPhase.preProcess(searchContext);
 
-        searchContext.queryResult().searchTimedOut(false);
+        boolean rescore = execute(searchContext, searchContext.searcher());
+
+        if (rescore) { // only if we do a regular search
+            rescorePhase.execute(searchContext);
+        }
+        suggestPhase.execute(searchContext);
+        aggregationPhase.execute(searchContext);
+    }
+
+    /**
+     * In a package-private method so that it can be tested without having to
+     * wire everything (mapperService, etc.)
+     * @return whether the rescoring phase should be executed
+     */
+    static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
+        QuerySearchResult queryResult = searchContext.queryResult();
+        queryResult.searchTimedOut(false);
 
         final SearchType searchType = searchContext.searchType();
         boolean rescore = false;
         try {
-            searchContext.queryResult().from(searchContext.from());
-            searchContext.queryResult().size(searchContext.size());
+            queryResult.from(searchContext.from());
+            queryResult.size(searchContext.size());
 
-            final IndexSearcher searcher = searchContext.searcher();
             Query query = searchContext.query();
 
             final int totalNumDocs = searcher.getIndexReader().numDocs();
             int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
 
             Collector collector;
-            final Callable<TopDocs> topDocsCallable;
+            Callable<TopDocs> topDocsCallable;
 
+            assert query == searcher.rewrite(query); // already rewritten
             if (searchContext.size() == 0) { // no matter what the value of from is
                 final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
                 collector = totalHitCountCollector;
@@ -240,36 +259,75 @@ public class QueryPhase implements SearchPhase {
                 collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
             }
 
+            if (collector.getClass() == TotalHitCountCollector.class) {
+                // Optimize counts in simple cases to return in constant time
+                // instead of using a collector
+                while (true) {
+                    // remove wrappers that don't matter for counts
+                    // this is necessary so that we don't only optimize match_all
+                    // queries but also match_all queries that are nested in
+                    // a constant_score query
+                    if (query instanceof ConstantScoreQuery) {
+                        query = ((ConstantScoreQuery) query).getQuery();
+                    } else {
+                        break;
+                    }
+                }
+
+                if (query.getClass() == MatchAllDocsQuery.class) {
+                    collector = null;
+                    topDocsCallable = new Callable<TopDocs>() {
+                        @Override
+                        public TopDocs call() throws Exception {
+                            int count = searcher.getIndexReader().numDocs();
+                            return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
+                        }
+                    };
+                } else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
+                    final Term term = ((TermQuery) query).getTerm();
+                    collector = null;
+                    topDocsCallable = new Callable<TopDocs>() {
+                        @Override
+                        public TopDocs call() throws Exception {
+                            int count = 0;
+                            for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
+                                count += context.reader().docFreq(term);
+                            }
+                            return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
+                        }
+                    };
+                }
+            }
+
             final boolean timeoutSet = searchContext.timeoutInMillis() != SearchService.NO_TIMEOUT.millis();
-            if (timeoutSet) {
+            if (timeoutSet && collector != null) { // collector might be null if no collection is actually needed
                 // TODO: change to use our own counter that uses the scheduler in ThreadPool
                 // throws TimeLimitingCollector.TimeExceededException when timeout has reached
                 collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis());
             }
 
             try {
-                searchContext.searcher().search(query, collector);
+                if (collector != null) {
+                    searcher.search(query, collector);
+                }
             } catch (TimeLimitingCollector.TimeExceededException e) {
                 assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
-                searchContext.queryResult().searchTimedOut(true);
+                queryResult.searchTimedOut(true);
             } catch (Lucene.EarlyTerminationException e) {
                 assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
-                searchContext.queryResult().terminatedEarly(true);
+                queryResult.terminatedEarly(true);
             } finally {
                 searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
             }
-            if (terminateAfterSet && searchContext.queryResult().terminatedEarly() == null) {
-                searchContext.queryResult().terminatedEarly(false);
+            if (terminateAfterSet && queryResult.terminatedEarly() == null) {
+                queryResult.terminatedEarly(false);
             }
 
-            searchContext.queryResult().topDocs(topDocsCallable.call());
+            queryResult.topDocs(topDocsCallable.call());
+
+            return rescore;
         } catch (Throwable e) {
             throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
         }
-        if (rescore) { // only if we do a regular search
-            rescorePhase.execute(searchContext);
-        }
-        suggestPhase.execute(searchContext);
-        aggregationPhase.execute(searchContext);
     }
 }
diff --git a/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
new file mode 100644
index 00000000000..99d600752ad
--- /dev/null
+++ b/core/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.query;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.index.query.ParsedQuery;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.TestSearchContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+public class QueryPhaseTests extends ESTestCase {
+
+    private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
+        TestSearchContext context = new TestSearchContext();
+        context.parsedQuery(new ParsedQuery(query));
+        context.setSize(0);
+
+        IndexSearcher searcher = new IndexSearcher(reader);
+        final AtomicBoolean collected = new AtomicBoolean();
+        IndexSearcher contextSearcher = new IndexSearcher(reader) {
+            protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+                collected.set(true);
+                super.search(leaves, weight, collector);
+            }
+        };
+
+        final boolean rescore = QueryPhase.execute(context, contextSearcher);
+        assertFalse(rescore);
+        assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits);
+        assertEquals(shouldCollect, collected.get());
+    }
+
+    private void countTestCase(boolean withDeletions) throws Exception {
+        Directory dir = newDirectory();
+        IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
+        RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir, iwc);
+        final int numDocs = scaledRandomIntBetween(100, 200);
+        for (int i = 0; i < numDocs; ++i) {
+            Document doc = new Document();
+            if (randomBoolean()) {
+                doc.add(new StringField("foo", "bar", Store.NO));
+            }
+            if (randomBoolean()) {
+                doc.add(new StringField("foo", "baz", Store.NO));
+            }
+            if (withDeletions && (rarely() || i == 0)) {
+                doc.add(new StringField("delete", "yes", Store.NO));
+            }
+            w.addDocument(doc);
+        }
+        if (withDeletions) {
+            w.deleteDocuments(new Term("delete", "yes"));
+        }
+        final IndexReader reader = w.getReader();
+        Query matchAll = new MatchAllDocsQuery();
+        Query matchAllCsq = new ConstantScoreQuery(matchAll);
+        Query tq = new TermQuery(new Term("foo", "bar"));
+        Query tCsq = new ConstantScoreQuery(tq);
+        BooleanQuery bq = new BooleanQuery();
+        bq.add(matchAll, Occur.SHOULD);
+        bq.add(tq, Occur.MUST);
+
+        countTestCase(matchAll, reader, false);
+        countTestCase(matchAllCsq, reader, false);
+        countTestCase(tq, reader, withDeletions);
+        countTestCase(tCsq, reader, withDeletions);
+        countTestCase(bq, reader, true);
+        reader.close();
+        w.close();
+        dir.close();
+    }
+
+    public void testCountWithoutDeletions() throws Exception {
+        countTestCase(false);
+    }
+
+    public void testCountWithDeletions() throws Exception {
+        countTestCase(true);
+    }
+
+    public void testPostFilterDisablesCountOptimization() throws Exception {
+        TestSearchContext context = new TestSearchContext();
+        context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
+        context.setSize(0);
+
+        final AtomicBoolean collected = new AtomicBoolean();
+        IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) {
+            protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+                collected.set(true);
+                super.search(leaves, weight, collector);
+            }
+        };
+
+        QueryPhase.execute(context, contextSearcher);
+        assertEquals(0, context.queryResult().topDocs().totalHits);
+        assertFalse(collected.get());
+
+        context.parsedPostFilter(new ParsedQuery(new MatchNoDocsQuery()));
+        QueryPhase.execute(context, contextSearcher);
+        assertEquals(0, context.queryResult().topDocs().totalHits);
+        assertTrue(collected.get());
+    }
+
+    public void testMinScoreDisablesCountOptimization() throws Exception {
+        TestSearchContext context = new TestSearchContext();
+        context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
+        context.setSize(0);
+
+        final AtomicBoolean collected = new AtomicBoolean();
+        IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) {
+            protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+                collected.set(true);
+                super.search(leaves, weight, collector);
+            }
+        };
+
+        QueryPhase.execute(context, contextSearcher);
+        assertEquals(0, context.queryResult().topDocs().totalHits);
+        assertFalse(collected.get());
+
+        context.minimumScore(1);
+        QueryPhase.execute(context, contextSearcher);
+        assertEquals(0, context.queryResult().topDocs().totalHits);
+        assertTrue(collected.get());
+    }
+
+}
diff --git a/core/src/test/java/org/elasticsearch/test/TestSearchContext.java b/core/src/test/java/org/elasticsearch/test/TestSearchContext.java
index 48725f4c3e4..56766b748b0 100644
--- a/core/src/test/java/org/elasticsearch/test/TestSearchContext.java
+++ b/core/src/test/java/org/elasticsearch/test/TestSearchContext.java
@@ -83,6 +83,12 @@ public class TestSearchContext extends SearchContext {
     final ThreadPool threadPool;
     final Map<Class<?>, Collector> queryCollectors = new HashMap<>();
     final IndexShard indexShard;
+    final Counter timeEstimateCounter = Counter.newCounter();
+    final QuerySearchResult queryResult = new QuerySearchResult();
+    ParsedQuery originalQuery;
+    ParsedQuery postFilter;
+    Query query;
+    Float minScore;
 
     ContextIndexSearcher searcher;
     int size;
@@ -363,12 +369,13 @@ public class TestSearchContext extends SearchContext {
 
     @Override
     public SearchContext minimumScore(float minimumScore) {
-        return null;
+        this.minScore = minimumScore;
+        return this;
     }
 
     @Override
     public Float minimumScore() {
-        return null;
+        return minScore;
     }
 
     @Override
@@ -393,12 +400,13 @@ public class TestSearchContext extends SearchContext {
 
     @Override
     public SearchContext parsedPostFilter(ParsedQuery postFilter) {
-        return null;
+        this.postFilter = postFilter;
+        return this;
     }
 
     @Override
     public ParsedQuery parsedPostFilter() {
-        return null;
+        return postFilter;
     }
 
     @Override
@@ -408,17 +416,19 @@ public class TestSearchContext extends SearchContext {
 
     @Override
     public SearchContext parsedQuery(ParsedQuery query) {
-        return null;
+        this.originalQuery = query;
+        this.query = query.query();
+        return this;
     }
 
     @Override
     public ParsedQuery parsedQuery() {
-        return null;
+        return originalQuery;
     }
 
     @Override
     public Query query() {
-        return null;
+        return query;
     }
 
     @Override
@@ -537,7 +547,7 @@ public class TestSearchContext extends SearchContext {
 
     @Override
     public QuerySearchResult queryResult() {
-        return null;
+        return queryResult;
     }
 
     @Override
@@ -580,7 +590,7 @@ public class TestSearchContext extends SearchContext {
 
     @Override
     public Counter timeEstimateCounter() {
-        throw new UnsupportedOperationException();
+        return timeEstimateCounter;
     }
 
     @Override