Shortcut counts on exists queries (#39570) (#39660)

`TopDocsCollectorContext` can already shortcut hit counts on `match_all` and `term` queries when there are no deletions. This change adds this ability for `exists` queries if the index doesn't have deletions and fields are indexed. Closes #37475
2019-03-04 19:53:43 +01:00 · 2019-03-04 19:53:43 +01:00 · 2fe1fa8972
parent af4918ebff
commit 2fe1fa8972
2 changed files with 60 additions and 8 deletions
--- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java
+++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java
@ -19,12 +19,18 @@

 package org.elasticsearch.search.query;

+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MultiCollector;
@ -125,6 +131,7 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
            }
        }

+        @Override
        Collector create(Collector in) {
            assert in == null;
            return collector;
@ -357,6 +364,29 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
                count += context.reader().docFreq(term);
            }
            return count;
+        } else if (query.getClass() == DocValuesFieldExistsQuery.class && reader.hasDeletions() == false) {
+            final String field = ((DocValuesFieldExistsQuery) query).getField();
+            int count = 0;
+            for (LeafReaderContext context : reader.leaves()) {
+                FieldInfos fieldInfos = context.reader().getFieldInfos();
+                FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+                if (fieldInfo != null) {
+                    if (fieldInfo.getPointIndexDimensionCount() > 0) {
+                        PointValues points = context.reader().getPointValues(field);
+                        if (points != null) {
+                            count += points.getDocCount();
+                        }
+                    } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
+                        Terms terms = context.reader().terms(field);
+                        if (terms != null) {
+                            count += terms.getDocCount();
+                        }
+                    } else {
+                        return -1; // no shortcut possible for fields that are not indexed
+                    }
+                }
+            }
+            return count;
        } else {
            return -1;
        }
--- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
+++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java
@ -21,7 +21,10 @@ package org.elasticsearch.search.query;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.LatLonDocValuesField;
+import org.apache.lucene.document.LatLonPoint;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@ -35,6 +38,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.FieldComparator;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.FilterCollector;
@ -50,6 +54,7 @@ import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.action.search.SearchTask;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.query.ParsedQuery;
@ -92,18 +97,20 @@ public class QueryPhaseTests extends IndexShardTestCase {
        closeShards(indexShard);
    }

-    private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
+    private void countTestCase(Query query, IndexReader reader, boolean shouldCollectSearch, boolean shouldCollectCount) throws Exception {
        TestSearchContext context = new TestSearchContext(null, indexShard);
        context.parsedQuery(new ParsedQuery(query));
        context.setSize(0);
        context.setTask(new SearchTask(123L, "", "", "", null, Collections.emptyMap()));

-        final IndexSearcher searcher = shouldCollect ? new IndexSearcher(reader) :
+        final IndexSearcher searcher = shouldCollectSearch ? new IndexSearcher(reader) :
            getAssertingEarlyTerminationSearcher(reader, 0);

        final boolean rescore = QueryPhase.execute(context, searcher, checkCancelled -> {});
        assertFalse(rescore);
-        assertEquals(searcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value);
+        IndexSearcher countSearcher = shouldCollectCount ? new IndexSearcher(reader) :
+            getAssertingEarlyTerminationSearcher(reader, 0);
+        assertEquals(countSearcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value);
    }

    private void countTestCase(boolean withDeletions) throws Exception {
@ -115,9 +122,14 @@ public class QueryPhaseTests extends IndexShardTestCase {
            Document doc = new Document();
            if (randomBoolean()) {
                doc.add(new StringField("foo", "bar", Store.NO));
+                doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar")));
+                doc.add(new SortedSetDocValuesField("docValuesOnlyField", new BytesRef("bar")));
+                doc.add(new LatLonDocValuesField("latLonDVField", 1.0, 1.0));
+                doc.add(new LatLonPoint("latLonDVField", 1.0, 1.0));
            }
            if (randomBoolean()) {
                doc.add(new StringField("foo", "baz", Store.NO));
+                doc.add(new SortedSetDocValuesField("foo", new BytesRef("baz")));
            }
            if (withDeletions && (rarely() || i == 0)) {
                doc.add(new StringField("delete", "yes", Store.NO));
@ -132,16 +144,25 @@ public class QueryPhaseTests extends IndexShardTestCase {
        Query matchAllCsq = new ConstantScoreQuery(matchAll);
        Query tq = new TermQuery(new Term("foo", "bar"));
        Query tCsq = new ConstantScoreQuery(tq);
+        Query dvfeq = new DocValuesFieldExistsQuery("foo");
+        Query dvfeq_points = new DocValuesFieldExistsQuery("latLonDVField");
+        Query dvfeqCsq = new ConstantScoreQuery(dvfeq);
+        // field with doc-values but not indexed will need to collect
+        Query dvOnlyfeq = new DocValuesFieldExistsQuery("docValuesOnlyField");
        BooleanQuery bq = new BooleanQuery.Builder()
            .add(matchAll, Occur.SHOULD)
            .add(tq, Occur.MUST)
            .build();

-        countTestCase(matchAll, reader, false);
-        countTestCase(matchAllCsq, reader, false);
-        countTestCase(tq, reader, withDeletions);
-        countTestCase(tCsq, reader, withDeletions);
-        countTestCase(bq, reader, true);
+        countTestCase(matchAll, reader, false, false);
+        countTestCase(matchAllCsq, reader, false, false);
+        countTestCase(tq, reader, withDeletions, withDeletions);
+        countTestCase(tCsq, reader, withDeletions, withDeletions);
+        countTestCase(dvfeq, reader, withDeletions, true);
+        countTestCase(dvfeq_points, reader, withDeletions, true);
+        countTestCase(dvfeqCsq, reader, withDeletions, true);
+        countTestCase(dvOnlyfeq, reader, true, true);
+        countTestCase(bq, reader, true, true);
        reader.close();
        w.close();
        dir.close();
@ -541,6 +562,7 @@ public class QueryPhaseTests extends IndexShardTestCase {

    private static IndexSearcher getAssertingEarlyTerminationSearcher(IndexReader reader, int size) {
        return new IndexSearcher(reader) {
+            @Override
            protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
                final Collector in = new AssertingEarlyTerminationFilterCollector(collector, size);
                super.search(leaves, weight, in);