LUCENE-10085: Implement Weight#count on DocValuesFieldExistsQuery (#445)

Co-authored-by: Adrien Grand <jpountz@gmail.com>
2021-11-19 21:06:58 +04:00 · 2021-11-19 21:06:58 +04:00 · 1a869c185b
parent af831d2810
commit 1a869c185b
3 changed files with 77 additions and 0 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -45,6 +45,10 @@ New Features
 * LUCENE-10220: Add an utility method to get IntervalSource from analyzed text (or token stream).
  (Uwe Schindler, Dawid Weiss, Alan Woodward)

+* LUCENE-10085: Added Weight#count on DocValuesFieldExistsQuery to speed up the query if terms or
+  points are indexed.
+  (Quentin Pradet, Adrien Grand)
+
 Improvements
 ---------------------

--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java
@ -19,7 +19,9 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.Objects;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;

@ -74,6 +76,22 @@ public final class DocValuesFieldExistsQuery extends Query {
        return new ConstantScoreScorer(this, score(), scoreMode, iterator);
      }

+      @Override
+      public int count(LeafReaderContext context) throws IOException {
+        final LeafReader reader = context.reader();
+        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
+        if (fieldInfo == null || fieldInfo.getDocValuesType() == DocValuesType.NONE) {
+          return 0; // the field doesn't index doc values
+        } else if (!reader.hasDeletions()) {
+          if (fieldInfo.getPointDimensionCount() > 0) {
+            return reader.getPointValues(field).getDocCount();
+          } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
+            return reader.terms(field).getDocCount();
+          }
+        }
+        return super.count(context);
+      }
+
      @Override
      public boolean isCacheable(LeafReaderContext ctx) {
        return DocValues.isCacheable(ctx, field);
--- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java
@ -19,15 +19,22 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;

 public class TestDocValuesFieldExistsQuery extends LuceneTestCase {

@ -206,6 +213,54 @@ public class TestDocValuesFieldExistsQuery extends LuceneTestCase {
    dir.close();
  }

+  public void testQueryMatchesCount() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+    int randomNumDocs = TestUtil.nextInt(random(), 10, 100);
+    int numMatchingDocs = 0;
+
+    for (int i = 0; i < randomNumDocs; i++) {
+      Document doc = new Document();
+      if (random().nextBoolean()) {
+        doc.add(new LongPoint("long", i));
+        doc.add(new NumericDocValuesField("long", i));
+        doc.add(new StringField("string", "value", Store.NO));
+        doc.add(new SortedDocValuesField("string", new BytesRef("value")));
+        numMatchingDocs++;
+      }
+      w.addDocument(doc);
+    }
+    w.forceMerge(1);
+
+    DirectoryReader reader = w.getReader();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+
+    assertSameCount(reader, searcher, "long", numMatchingDocs);
+    assertSameCount(reader, searcher, "string", numMatchingDocs);
+    assertSameCount(reader, searcher, "doesNotExist", 0);
+
+    // Test that we can't count in O(1) when there are deleted documents
+    w.w.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
+    w.deleteDocuments(LongPoint.newRangeQuery("long", 0L, 10L));
+    DirectoryReader reader2 = w.getReader();
+    final IndexSearcher searcher2 = new IndexSearcher(reader2);
+    final Query testQuery = new DocValuesFieldExistsQuery("long");
+    final Weight weight2 = searcher2.createWeight(testQuery, ScoreMode.COMPLETE, 1);
+    assertEquals(weight2.count(reader2.leaves().get(0)), -1);
+
+    IOUtils.close(reader, reader2, w, dir);
+  }
+
+  private void assertSameCount(
+      IndexReader reader, IndexSearcher searcher, String field, int numMatchingDocs)
+      throws IOException {
+    final Query testQuery = new DocValuesFieldExistsQuery(field);
+    assertEquals(searcher.count(testQuery), numMatchingDocs);
+    final Weight weight = searcher.createWeight(testQuery, ScoreMode.COMPLETE, 1);
+    assertEquals(weight.count(reader.leaves().get(0)), numMatchingDocs);
+  }
+
  private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
      throws IOException {
    final int maxDoc = searcher.getIndexReader().maxDoc();