Check ahead if we can get the count (#13899)

Currently, we traverse the BKD tree or perform a binary search using DocValues first, and then check whether the count can be obtained in the count() method of IndexSortSortedNumericDocValuesRangeQuery. we should consider providing a mechanism to perform this check beforehand, avoid unnecessary processing when dealing with a sparseRange
2024-10-25 15:05:51 +08:00 · 2024-10-25 15:05:51 +08:00 · 0bbef32cf5
parent 2ec5cc8c17
commit 0bbef32cf5
2 changed files with 37 additions and 1 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -63,6 +63,8 @@ Optimizations
 * GITHUB#13941: Optimized computation of top-hits on disjunctive queries with
  many clauses. (Adrien Grand)

+* GITHUB#13899: Check ahead if we can get the count. (Lu Xugang)
+
 Bug Fixes
 ---------------------
 * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java
@ -186,10 +186,44 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
      @Override
      public int count(LeafReaderContext context) throws IOException {
        if (context.reader().hasDeletions() == false) {
-          IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
+          if (lowerValue > upperValue) {
+            return 0;
+          }
+          IteratorAndCount itAndCount = null;
+          LeafReader reader = context.reader();
+
+          // first use bkd optimization if possible
+          SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(reader, field);
+          NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
+          PointValues pointValues = reader.getPointValues(field);
+          if (pointValues != null && pointValues.getDocCount() == reader.maxDoc()) {
+            itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
+          }
          if (itAndCount != null && itAndCount.count != -1) {
            return itAndCount.count;
          }
+
+          // use index sort optimization if possible
+          Sort indexSort = reader.getMetaData().sort();
+          if (indexSort != null
+              && indexSort.getSort().length > 0
+              && indexSort.getSort()[0].getField().equals(field)) {
+            final SortField sortField = indexSort.getSort()[0];
+            final SortField.Type sortFieldType = getSortFieldType(sortField);
+            // The index sort optimization is only supported for Type.INT and Type.LONG
+            if (sortFieldType == Type.INT || sortFieldType == Type.LONG) {
+              Object missingValue = sortField.getMissingValue();
+              final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
+              // all documents have docValues or missing value falls outside the range
+              if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
+                  || (missingLongValue < lowerValue || missingLongValue > upperValue)) {
+                itAndCount = getDocIdSetIterator(sortField, sortFieldType, context, numericValues);
+              }
+              if (itAndCount != null && itAndCount.count != -1) {
+                return itAndCount.count;
+              }
+            }
+          }
        }
        return fallbackWeight.count(context);
      }