From 1a869c185b69dbef10d3861c74beb11bf1ffd3de Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Fri, 19 Nov 2021 21:06:58 +0400 Subject: [PATCH] LUCENE-10085: Implement Weight#count on DocValuesFieldExistsQuery (#445) Co-authored-by: Adrien Grand --- lucene/CHANGES.txt | 4 ++ .../search/DocValuesFieldExistsQuery.java | 18 ++++++ .../search/TestDocValuesFieldExistsQuery.java | 55 +++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 341d80b5bce..88a8d9a4d95 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -45,6 +45,10 @@ New Features * LUCENE-10220: Add an utility method to get IntervalSource from analyzed text (or token stream). (Uwe Schindler, Dawid Weiss, Alan Woodward) +* LUCENE-10085: Added Weight#count on DocValuesFieldExistsQuery to speed up the query if terms or + points are indexed. + (Quentin Pradet, Adrien Grand) + Improvements --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index 851c5bfbbd0..acdd899d58a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -19,7 +19,9 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Objects; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -74,6 +76,22 @@ public final class DocValuesFieldExistsQuery extends Query { return new ConstantScoreScorer(this, score(), scoreMode, iterator); } + @Override + public int count(LeafReaderContext context) throws IOException { + final LeafReader reader = context.reader(); + final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); + if (fieldInfo == null || fieldInfo.getDocValuesType() == DocValuesType.NONE) { + return 0; // the field doesn't index doc values + } else if (!reader.hasDeletions()) { + if (fieldInfo.getPointDimensionCount() > 0) { + return reader.getPointValues(field).getDocCount(); + } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { + return reader.terms(field).getDocCount(); + } + } + return super.count(context); + } + @Override public boolean isCacheable(LeafReaderContext ctx) { return DocValues.isCacheable(ctx, field); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java index 3bdc4a37cc6..489ee0f35e9 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java @@ -19,15 +19,22 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; public class TestDocValuesFieldExistsQuery extends LuceneTestCase { @@ -206,6 +213,54 @@ public class TestDocValuesFieldExistsQuery extends LuceneTestCase { dir.close(); } + public void testQueryMatchesCount() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + int randomNumDocs = TestUtil.nextInt(random(), 10, 100); + int numMatchingDocs = 0; + + for (int i = 0; i < randomNumDocs; i++) { + Document doc = new Document(); + if (random().nextBoolean()) { + doc.add(new LongPoint("long", i)); + doc.add(new NumericDocValuesField("long", i)); + doc.add(new StringField("string", "value", Store.NO)); + doc.add(new SortedDocValuesField("string", new BytesRef("value"))); + numMatchingDocs++; + } + w.addDocument(doc); + } + w.forceMerge(1); + + DirectoryReader reader = w.getReader(); + final IndexSearcher searcher = new IndexSearcher(reader); + + assertSameCount(reader, searcher, "long", numMatchingDocs); + assertSameCount(reader, searcher, "string", numMatchingDocs); + assertSameCount(reader, searcher, "doesNotExist", 0); + + // Test that we can't count in O(1) when there are deleted documents + w.w.getConfig().setMergePolicy(NoMergePolicy.INSTANCE); + w.deleteDocuments(LongPoint.newRangeQuery("long", 0L, 10L)); + DirectoryReader reader2 = w.getReader(); + final IndexSearcher searcher2 = new IndexSearcher(reader2); + final Query testQuery = new DocValuesFieldExistsQuery("long"); + final Weight weight2 = searcher2.createWeight(testQuery, ScoreMode.COMPLETE, 1); + assertEquals(weight2.count(reader2.leaves().get(0)), -1); + + IOUtils.close(reader, reader2, w, dir); + } + + private void assertSameCount( + IndexReader reader, IndexSearcher searcher, String field, int numMatchingDocs) + throws IOException { + final Query testQuery = new DocValuesFieldExistsQuery(field); + assertEquals(searcher.count(testQuery), numMatchingDocs); + final Weight weight = searcher.createWeight(testQuery, ScoreMode.COMPLETE, 1); + assertEquals(weight.count(reader.leaves().get(0)), numMatchingDocs); + } + private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores) throws IOException { final int maxDoc = searcher.getIndexReader().maxDoc();